コード例 #1
0
 def __init__(self,
              *,
              nx,
              ny,
              hiddenSize,
              ninv,
              nfea,
              hiddeninv,
              dr=0.5,
              drinv=0.5):
     # two LSTM
     super(CudnnInvLstmModel, self).__init__()
     self.nx = nx
     self.ny = ny
     self.hiddenSize = hiddenSize
     self.ninv = ninv
     self.nfea = nfea
     self.hiddeninv = hiddeninv
     self.lstminv = rnn.CudnnLstmModel(nx=ninv,
                                       ny=nfea,
                                       hiddenSize=hiddeninv,
                                       dr=drinv)
     self.lstm = rnn.CudnnLstmModel(nx=nfea + nx,
                                    ny=ny,
                                    hiddenSize=hiddenSize,
                                    dr=dr)
     self.gpu = 1
     self.name = "CudnnInvLstmModel"
     self.is_legacy = True
コード例 #2
0
ファイル: basinFull.py プロジェクト: fkwai/geolearn
def defineModel(dataTup, dictP):
    [nx, nxc, ny, nyc, nt, ns] = trainBasin.getSize(dataTup)
    if dictP['crit'] == 'SigmaLoss':
        ny = ny*2
        nyc = nyc*2
    # define model
    if dictP['modelName'] == 'CudnnLSTM':
        model = rnn.CudnnLstmModel(
            nx=nx+nxc, ny=ny+nyc, hiddenSize=dictP['hiddenSize'])
    elif dictP['modelName'] == 'LstmModel':
        model = rnn.LstmModel(
            nx=nx+nxc, ny=ny+nyc, hiddenSize=dictP['hiddenSize'])
    else:
        raise RuntimeError('Model not specified')
    return model
コード例 #3
0
ファイル: basins2.py プロジェクト: sadeghst/geolearn
def trainModelTS(outName):
    outFolder = nameFolder(outName)
    dictP = loadMaster(outName)

    # load data
    wqData = waterQuality.DataModelWQ(dictP['dataName'])
    varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC'])
    dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup)
    dataTup = trainTS.dealNaN(dataTup, dictP['optNaN'])
    wrapStat(outName, statTup)

    # train model
    [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup)
    if dictP['modelName'] == 'CudnnLSTM':
        model = rnn.CudnnLstmModel(nx=nx + nxc,
                                   ny=ny + nyc,
                                   hiddenSize=dictP['hiddenSize'])
    lossFun = crit.RmseLoss()
    if torch.cuda.is_available():
        lossFun = lossFun.cuda()
        model = model.cuda()
    optim = torch.optim.Adadelta(model.parameters())
    lossLst = list()
    nEp = dictP['nEpoch']
    sEp = dictP['saveEpoch']
    logFile = os.path.join(outFolder, 'log')
    if os.path.exists(logFile):
        os.remove(logFile)
    for k in range(0, nEp, sEp):
        model, optim, lossEp = trainTS.trainModel(dataTup,
                                                  model,
                                                  lossFun,
                                                  optim,
                                                  batchSize=dictP['batchSize'],
                                                  nEp=sEp,
                                                  cEp=k,
                                                  logFile=logFile)
        # save model
        saveModel(outName, k + sEp, model, optim=optim)
        lossLst = lossLst + lossEp

    lossFile = os.path.join(outFolder, 'loss.csv')
    pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
コード例 #4
0
ファイル: test.py プロジェクト: sadeghst/geolearn
doLst.append('train')
# doLst.append('test')
# doLst.append('post')

if 'train' in doLst:
    # load data
    df = hydroDL.data.dbCsv.DataframeCsv(rootDB=rootDB,
                                         subset='CONUSv4f1',
                                         tRange=ty1)
    x = df.getDataTs(dbCsv.varForcing, doNorm=True, rmNan=True)
    c = df.getDataConst(dbCsv.varConst, doNorm=True, rmNan=True)
    y = df.getDataTs('SMAP_AM', doNorm=True, rmNan=False)
    nx = x.shape[-1] + c.shape[-1]
    ny = 1

    model = rnn.CudnnLstmModel(nx=nx, ny=ny, hiddenSize=64)
    lossFun = crit.RmseLoss()
    model = train.trainModel(model,
                             x,
                             y,
                             c,
                             lossFun,
                             nEpoch=nEpoch,
                             miniBatch=[100, 30])
    modelName = 'test-LSTM'
    train.saveModel(outFolder, model, nEpoch, modelName=modelName)

    for k in dLst:
        sd = utils.time.t2dt(ty1[0]) - dt.timedelta(days=k)
        ed = utils.time.t2dt(ty1[1]) - dt.timedelta(days=k)
        df2 = hydroDL.data.dbCsv.DataframeCsv(rootDB=rootDB,
コード例 #5
0
ファイル: basins.py プロジェクト: sadeghst/geolearn
def trainModelTS(outName):
    outFolder = nameFolder(outName)
    dictP = loadMaster(outName)

    # load data
    rmFlag = dictP['rmFlag'] if 'rmFlag' in dictP else False
    wqData = waterQuality.DataModelWQ(dictP['dataName'], rmFlag)
    varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC'])
    dataTup, statTup = wqData.transIn(subset=dictP['trainName'], varTup=varTup)
    dataTup = trainTS.dealNaN(dataTup, dictP['optNaN'])
    wrapStat(outName, statTup)

    # train model
    [nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup)
    # define loss
    if dictP['crit'] == 'RmseLoss':
        lossFun = crit.RmseLoss()
    elif dictP['crit'] == 'RmseLoss2D':
        lossFun = crit.RmseLoss2D()
    elif dictP['crit'] == 'SigmaLoss':
        lossFun = crit.SigmaLoss()
        ny = ny * 2
        nyc = nyc * 2
    else:
        raise RuntimeError('loss function not specified')
    # define model
    if dictP['modelName'] == 'CudnnLSTM':
        model = rnn.CudnnLstmModel(nx=nx + nxc,
                                   ny=ny + nyc,
                                   hiddenSize=dictP['hiddenSize'])
    elif dictP['modelName'] == 'LstmModel':
        model = rnn.LstmModel(nx=nx + nxc,
                              ny=ny + nyc,
                              hiddenSize=dictP['hiddenSize'])
    elif dictP['modelName'] == 'AgeLSTM':
        model = rnn.AgeLSTM2(nx=nx + nxc,
                             ny=ny,
                             nyc=nyc,
                             rho=365,
                             nh=dictP['hiddenSize'])
    else:
        raise RuntimeError('Model not specified')

    if torch.cuda.is_available():
        lossFun = lossFun.cuda()
        model = model.cuda()

    if dictP['optim'] == 'AdaDelta':
        optim = torch.optim.Adadelta(model.parameters())
    else:
        raise RuntimeError('optimizor function not specified')

    lossLst = list()
    nEp = dictP['nEpoch']
    sEp = dictP['saveEpoch']
    logFile = os.path.join(outFolder, 'log')
    if os.path.exists(logFile):
        os.remove(logFile)
    for k in range(0, nEp, sEp):
        model, optim, lossEp = trainTS.trainModel(dataTup,
                                                  model,
                                                  lossFun,
                                                  optim,
                                                  batchSize=dictP['batchSize'],
                                                  nEp=sEp,
                                                  cEp=k,
                                                  logFile=logFile)
        # save model
        saveModel(outName, k + sEp, model, optim=optim)
        lossLst = lossLst + lossEp

    lossFile = os.path.join(outFolder, 'loss.csv')
    pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
コード例 #6
0
ファイル: master.py プロジェクト: mhpi/hydroDL
def train(mDict):
    if mDict is str:
        mDict = readMasterFile(mDict)
    out = mDict["out"]
    optData = mDict["data"]
    optModel = mDict["model"]
    optLoss = mDict["loss"]
    optTrain = mDict["train"]

    # fix the random seed
    if optTrain["seed"] is None:
        # generate random seed
        randomseed = int(np.random.uniform(low=0, high=1e6))
        optTrain["seed"] = randomseed
        print("random seed updated!")
    else:
        randomseed = optTrain["seed"]

    random.seed(randomseed)
    torch.manual_seed(randomseed)
    np.random.seed(randomseed)
    torch.cuda.manual_seed(randomseed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # data
    df, x, y, c = loadData(optData)
    # x: ngage*nday*nvar
    # y: ngage*nday*nvar
    # c: ngage*nvar
    # temporal test, fill obs nan using LSTM forecast
    # temp = x[:,:,-1, None]
    # y[np.isnan(y)] = temp[np.isnan(y)]

    if c is None:
        if type(x) is tuple:
            nx = x[0].shape[-1]
        else:
            nx = x.shape[-1]
    else:
        if type(x) is tuple:
            nx = x[0].shape[-1] + c.shape[-1]
        else:
            nx = x.shape[-1] + c.shape[-1]
    ny = y.shape[-1]

    # loss
    if eval(optLoss["name"]) is hydroDL.model.crit.SigmaLoss:
        lossFun = crit.SigmaLoss(prior=optLoss["prior"])
        optModel["ny"] = ny * 2
    elif eval(optLoss["name"]) is hydroDL.model.crit.RmseLoss:
        lossFun = crit.RmseLoss()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.NSELoss:
        lossFun = crit.NSELoss()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.NSELosstest:
        lossFun = crit.NSELosstest()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.MSELoss:
        lossFun = crit.MSELoss()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.RmseLossCNN:
        lossFun = crit.RmseLossCNN()
        optModel["ny"] = ny
    elif eval(optLoss["name"]) is hydroDL.model.crit.ModifyTrend1:
        lossFun = crit.ModifyTrend1()
        optModel["ny"] = ny

    # model
    if optModel["nx"] != nx:
        print("updated nx by input data")
        optModel["nx"] = nx
    if eval(optModel["name"]) is hydroDL.model.rnn.CudnnLstmModel:
        if type(x) is tuple:
            x = np.concatenate([x[0], x[1]], axis=2)
            if c is None:
                nx = x.shape[-1]
            else:
                nx = x.shape[-1] + c.shape[-1]
            optModel["nx"] = nx
            print("Concatenate input and obs, update nx by obs")
        model = rnn.CudnnLstmModel(
            nx=optModel["nx"], ny=optModel["ny"], hiddenSize=optModel["hiddenSize"]
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.CpuLstmModel:
        model = rnn.CpuLstmModel(
            nx=optModel["nx"], ny=optModel["ny"], hiddenSize=optModel["hiddenSize"]
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.LstmCloseModel:
        model = rnn.LstmCloseModel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            hiddenSize=optModel["hiddenSize"],
            fillObs=True,
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.AnnModel:
        model = rnn.AnnCloseModel(
            nx=optModel["nx"], ny=optModel["ny"], hiddenSize=optModel["hiddenSize"]
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.AnnCloseModel:
        model = rnn.AnnCloseModel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            hiddenSize=optModel["hiddenSize"],
            fillObs=True,
        )
    elif eval(optModel["name"]) is hydroDL.model.cnn.LstmCnn1d:
        convpara = optModel["convNKSP"]
        model = hydroDL.model.cnn.LstmCnn1d(
            nx=optModel["nx"],
            ny=optModel["ny"],
            rho=optModel["rho"],
            nkernel=convpara[0],
            kernelSize=convpara[1],
            stride=convpara[2],
            padding=convpara[3],
        )
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLSTMmodel:
        daobsOption = optData["daObs"]
        if type(daobsOption) is list:
            if len(daobsOption) - 3 >= 7:
                # using 1dcnn only when number of obs larger than 7
                optModel["nobs"] = len(daobsOption)
                convpara = optModel["convNKS"]
                model = rnn.CNN1dLSTMmodel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    nobs=optModel["nobs"] - 3,
                    hiddenSize=optModel["hiddenSize"],
                    nkernel=convpara[0],
                    kernelSize=convpara[1],
                    stride=convpara[2],
                    poolOpt=optModel["poolOpt"],
                )
                print("CNN1d Kernel is used!")
            else:
                if type(x) is tuple:
                    x = np.concatenate([x[0], x[1]], axis=2)
                    nx = x.shape[-1] + c.shape[-1]
                    optModel["nx"] = nx
                    print("Concatenate input and obs, update nx by obs")
                model = rnn.CudnnLstmModel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    hiddenSize=optModel["hiddenSize"],
                )
                optModel["name"] = "hydroDL.model.rnn.CudnnLstmModel"
                print("Too few obserservations, not using cnn kernel")
        else:
            raise Exception("CNN kernel used but daobs option is not obs list")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLSTMInmodel:
        # daobsOption = optData['daObs']
        daobsOption = list(range(24))
        if type(daobsOption) is list:
            if len(daobsOption) - 3 >= 7:
                # using 1dcnn only when number of obs larger than 7
                optModel["nobs"] = len(daobsOption)
                convpara = optModel["convNKS"]
                model = rnn.CNN1dLSTMInmodel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    # nobs=optModel['nobs']-3,
                    nobs=24,  # temporary test
                    hiddenSize=optModel["hiddenSize"],
                    nkernel=convpara[0],
                    kernelSize=convpara[1],
                    stride=convpara[2],
                    poolOpt=optModel["poolOpt"],
                )
                print("CNN1d Kernel is used!")
            else:
                if type(x) is tuple:
                    x = np.concatenate([x[0], x[1]], axis=2)
                    nx = x.shape[-1] + c.shape[-1]
                    optModel["nx"] = nx
                    print("Concatenate input and obs, update nx by obs")
                model = rnn.CudnnLstmModel(
                    nx=optModel["nx"],
                    ny=optModel["ny"],
                    hiddenSize=optModel["hiddenSize"],
                )
                optModel["name"] = "hydroDL.model.rnn.CudnnLstmModel"
                print("Too few obserservations, not using cnn kernel")
        else:
            raise Exception("CNN kernel used but daobs option is not obs list")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLCmodel:
        # LCrange = optData['lckernel']
        # tLCLst = utils.time.tRange2Array(LCrange)
        if len(x[1].shape) == 2:
            # for LC-FDC
            optModel["nobs"] = x[1].shape[-1]
        elif len(x[1].shape) == 3:
            # for LC-SMAP--get time step
            optModel["nobs"] = x[1].shape[1]
        convpara = optModel["convNKS"]
        model = rnn.CNN1dLCmodel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            nobs=optModel["nobs"],
            hiddenSize=optModel["hiddenSize"],
            nkernel=convpara[0],
            kernelSize=convpara[1],
            stride=convpara[2],
            poolOpt=optModel["poolOpt"],
        )
        print("CNN1d Local calibartion Kernel is used!")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CNN1dLCInmodel:
        LCrange = optData["lckernel"]
        tLCLst = utils.time.tRange2Array(LCrange)
        optModel["nobs"] = x[1].shape[-1]
        convpara = optModel["convNKS"]
        model = rnn.CNN1dLCInmodel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            nobs=optModel["nobs"],
            hiddenSize=optModel["hiddenSize"],
            nkernel=convpara[0],
            kernelSize=convpara[1],
            stride=convpara[2],
            poolOpt=optModel["poolOpt"],
        )
        print("CNN1d Local calibartion Kernel is used!")
    elif eval(optModel["name"]) is hydroDL.model.rnn.CudnnInvLstmModel:
        # optModel['ninv'] = x[1].shape[-1]
        optModel["ninv"] = x[1].shape[-1] + c.shape[-1]  # Test the inv using attributes
        model = rnn.CudnnInvLstmModel(
            nx=optModel["nx"],
            ny=optModel["ny"],
            hiddenSize=optModel["hiddenSize"],
            ninv=optModel["ninv"],
            nfea=optModel["nfea"],
            hiddeninv=optModel["hiddeninv"],
        )
        print("LSTMInv model is used!")
    # train
    if optTrain["saveEpoch"] > optTrain["nEpoch"]:
        optTrain["saveEpoch"] = optTrain["nEpoch"]

    # train model
    writeMasterFile(mDict)
    model = trainModel(
        model,
        x,
        y,
        c,
        lossFun,
        nEpoch=optTrain["nEpoch"],
        miniBatch=optTrain["miniBatch"],
        saveEpoch=optTrain["saveEpoch"],
        saveFolder=out,
    )
コード例 #7
0
matX = dfXN.values
matY = dfYN.values

nx = len(varX)
ny = len(varY)
ind1 = np.where(~np.isnan(matY1))[0]
ind1 = ind1[ind1 > 365]
ind2 = np.where(~np.isnan(matY2))[0]
rho = 365
rhoF = 365
nh = 256
ns = 10

importlib.reload(rnn)
model = rnn.AgeLSTM(nx=nx, nh=nh, rho=rho).cuda()
model2 = rnn.CudnnLstmModel(nx=nx, ny=1, hiddenSize=nh).cuda()
optim = torch.optim.Adadelta(model.parameters())
optim2 = torch.optim.Adadelta(model2.parameters())
lossFun = crit.RmseLoss().cuda()
# train
model.train()
for i in range(100):
    t0 = time.time()
    x = np.ndarray([rho + rhoF, ns, nx])
    y = np.ndarray([rhoF, ns, ny])
    for k in range(ns):
        ind = ind1[np.random.randint(len(ind1))]
        x[:, k, :] = matX1[ind - rho - rhoF:ind, :]
        y[:, k, :] = matY1[ind - rhoF:ind, :]
    xx = torch.from_numpy(x).float().cuda()
    yy = torch.from_numpy(y).float().cuda()
コード例 #8
0
    dfz2 = camels.DataframeCamels(subset='all', tRange=[20041225, 20091225])
    z2 = dfz2.getDataObs(doNorm=True, rmNan=True)
    # z2 = interp.interpNan1d(z2, mode='pre')
    xz2 = np.concatenate([x1, z2], axis=2)

    ny = 1
    nx = x1.shape[-1] + c1.shape[-1]
    lossFun = crit.RmseLoss()

    # model1 = rnn.CudnnLstmModel(nx=nx, ny=ny, hiddenSize=64)
    # model1 = train.trainModel(
    #     model1, x1, y1, c1, lossFun, nEpoch=nEpoch, miniBatch=(50, 365))
    # train.saveModel(outFolder, model1, nEpoch, modelName='LSTM')

    model2 = rnn.CudnnLstmModel(nx=nx + 1, ny=ny, hiddenSize=64)
    model2 = train.trainModel(
        model2, xz1, y1, c1, lossFun, nEpoch=nEpoch, miniBatch=(50, 365))
    train.saveModel(outFolder, model2, nEpoch, modelName='DA-1')

    model3 = rnn.CudnnLstmModel(nx=nx + 1, ny=ny, hiddenSize=64)
    model3 = train.trainModel(
        model3, xz2, y1, c1, lossFun, nEpoch=nEpoch, miniBatch=(50, 365))
    train.saveModel(outFolder, model3, nEpoch, modelName='DA-7')

if 'test' in doLst:
    df2 = camels.DataframeCamels(subset='all', tRange=[20050101, 20150101])
    x2 = df2.getDataTS(varLst=camels.forcingLst, doNorm=True, rmNan=True)
    c2 = df2.getDataConst(varLst=camels.attrLstSel, doNorm=True, rmNan=True)
    yt2 = df2.getDataObs(doNorm=False, rmNan=False).squeeze()
コード例 #9
0
ファイル: modelSave.py プロジェクト: fkwai/geolearn
outName = 'weathering-FPR2QC-t365-B10'
ep = 100

# save
outFolder = basinFull.nameFolder(outName)
modelFile = os.path.join(outFolder, 'model_ep{}'.format(ep))
model = torch.load(modelFile)
modelStateFile = os.path.join(outFolder, 'modelState_ep{}'.format(ep))
torch.save(model.state_dict(), modelStateFile)

# load
dictP = basinFull.loadMaster(outName)
DF = dbBasin.DataFrameBasin(dictP['dataName'])
dictVar = {k: dictP[k] for k in ('varX', 'varXC', 'varY', 'varYC')}
DM = dbBasin.DataModelBasin(DF, subset='A10', **dictVar)
DM.loadStat(outFolder)
dataTup = DM.getData()
[nx, nxc, ny, nyc, nt, ns] = trainBasin.getSize(dataTup)
dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN'])
if dictP['modelName'] == 'CudnnLSTM':
    model = rnn.CudnnLstmModel(nx=nx + nxc,
                               ny=ny + nyc,
                               hiddenSize=dictP['hiddenSize'])
elif dictP['modelName'] == 'LstmModel':
    model = rnn.LstmModel(nx=nx + nxc,
                          ny=ny + nyc,
                          hiddenSize=dictP['hiddenSize'])
else:
    raise RuntimeError('Model not specified')
model.load_state_dict(torch.load(modelStateFile))
コード例 #10
0
ファイル: modelC.py プロジェクト: sadeghst/geolearn
    cTemp = np.tile(c[iR, :], [nt, 1, 1])
    xTemp = x[nt - rho:rho, iR, :]
    xTensor = torch.from_numpy(np.concatenate([xTemp, cTemp], axis=-1)).float()
    if torch.cuda.is_available():
        xTensor = xTensor.cuda()
        yTensor = yTensor.cuda()
    return xTensor, yTensor


# model
if resumeEpoch != 0:
    modelFile = os.path.join(modelFolder,
                             'model_Ep' + str(resumeEpoch) + '.pt')
    model = torch.load(modelFile)
else:
    model = rnn.CudnnLstmModel(nx=nx + nc, ny=ny, hiddenSize=hiddenSize)
lossFun = crit.RmseEnd()
if torch.cuda.is_available():
    lossFun = lossFun.cuda()
    model = model.cuda()
optim = torch.optim.Adadelta(model.parameters())

# training
if batchSize > nd:
    nIterEp = 1
else:
    nIterEp = int(np.ceil(np.log(0.01) / np.log(1 - batchSize / nd)))
lossEp = 0
lossEpLst = list()
t0 = time.time()
model.train()
コード例 #11
0
        torch.cuda.manual_seed(randomseed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

        # load data
        df, x, y, c = master.loadData(
            optData, TempTarget, forcing_path, attr_path, out
        )  # df: CAMELS dataframe; x: forcings; y: streamflow obs; c:attributes
        # main outputs of this step are numpy ndArrays: x[nb,nt,nx], y[nb,nt, ny], c[nb,nc]
        # nb: number of basins, nt: number of time steps (in Ttrain), nx: number of time-dependent forcing variables
        # ny: number of target variables, nc: number of constant attributes
        nx = x.shape[-1] + c.shape[-1]  # update nx, nx = nx + nc
        ny = y.shape[-1]

        if torch.cuda.is_available():
            model = rnn.CudnnLstmModel(nx=nx, ny=ny, hiddenSize=HIDDENSIZE)
        else:
            model = rnn.CpuLstmModel(nx=nx, ny=ny, hiddenSize=HIDDENSIZE)

        optModel = default.update(optModel, nx=nx, ny=ny)
        # the loaded model should be consistent with the 'name' in optModel Dict above for logging purpose
        lossFun = crit.RmseLoss()
        # the loaded loss should be consistent with the 'name' in optLoss Dict above for logging purpose
        # update and write the dictionary variable to out folder for logging and future testing
        masterDict = master.wrapMaster(out, optData, optModel, optLoss,
                                       optTrain)
        master.writeMasterFile(masterDict)
        # train model

        out1 = out
コード例 #12
0
[nx, nxc, ny, nyc, nt, ns] = trainTS.getSize(dataTup)
xx = np.zeros([ns, nt, nx + nxc])
for k in range(ns):
    xTemp = dataTup[0][:, k, :]
    xcTemp = dataTup[1][k, :]
    temp = np.concatenate([xTemp, np.tile(xcTemp, [365, 1])], axis=-1)
    xx[k, :, :] = temp
xT = torch.from_numpy(xx).float().cuda()
yy = np.swapaxes(dataTup[2], 0, 1)
yT = torch.from_numpy(yy).float().cuda()

# xT = xT[0:1, :, :]
# yT = yT[0:1, :, :]

# train model
model = rnn.CudnnLstmModel(nx=nx + nxc, ny=ny + nyc, hiddenSize=128)
lossFun = crit.RmseLoss()
lossFun = lossFun.cuda()
model = model.cuda()
optim = torch.optim.Adadelta(model.parameters())
# optim = torch.optim.SGD(model.parameters(), lr=0.01)
lossLst = list()

# backup - subset only spatial
nbatch = 500
iterEp = int(np.ceil(np.log(0.01) / np.log(1 - nbatch / ns)))

for i in range(100):
    nbatch = 500
    iS = np.random.randint(0, ns, nbatch)
    xT = torch.from_numpy(xx[iS, :, :]).float().cuda()
コード例 #13
0
def trainModel(outName):
    outFolder = nameFolder(outName)
    dictP = loadMaster(outName)

    # load data
    DM = dbBasin.DataModelFull(dictP['dataName'])
    varTup = (dictP['varX'], dictP['varXC'], dictP['varY'], dictP['varYC'])
    dataTup = DM.extractData(varTup, dictP['subset'], dictP['sd'], dictP['ed'])
    if dictP['borrowStat'] is None:
        dataTup, statTup = DM.transIn(dataTup, varTup)
    else:
        statTup = loadStat(dictP['borrowStat'])
        dataTup = DM.transIn(dataTup, varTup, statTup=statTup)
    dataTup = trainBasin.dealNaN(dataTup, dictP['optNaN'])
    wrapStat(outName, statTup)

    # train model
    [nx, nxc, ny, nyc, nt, ns] = trainBasin.getSize(dataTup)
    # define loss
    lossFun = getattr(crit, dictP['crit'])()
    if dictP['crit'] == 'SigmaLoss':
        ny = ny * 2
        nyc = nyc * 2
    # define model
    if dictP['modelName'] == 'CudnnLSTM':
        model = rnn.CudnnLstmModel(nx=nx + nxc,
                                   ny=ny + nyc,
                                   hiddenSize=dictP['hiddenSize'])
    elif dictP['modelName'] == 'LstmModel':
        model = rnn.LstmModel(nx=nx + nxc,
                              ny=ny + nyc,
                              hiddenSize=dictP['hiddenSize'])
    else:
        raise RuntimeError('Model not specified')

    if torch.cuda.is_available():
        lossFun = lossFun.cuda()
        model = model.cuda()

    if dictP['optim'] == 'AdaDelta':
        optim = torch.optim.Adadelta(model.parameters())
    else:
        raise RuntimeError('optimizor function not specified')

    lossLst = list()
    nEp = dictP['nEpoch']
    sEp = dictP['saveEpoch']
    logFile = os.path.join(outFolder, 'log')
    if os.path.exists(logFile):
        os.remove(logFile)
    for k in range(0, nEp, sEp):
        model, optim, lossEp = trainBasin.trainModel(
            dataTup,
            model,
            lossFun,
            optim,
            batchSize=dictP['batchSize'],
            nEp=sEp,
            cEp=k,
            logFile=logFile)
        # save model
        saveModel(outName, k + sEp, model, optim=optim)
        lossLst = lossLst + lossEp

    lossFile = os.path.join(outFolder, 'loss.csv')
    pd.DataFrame(lossLst).to_csv(lossFile, index=False, header=False)
コード例 #14
0
    if not os.path.isdir(out):
        os.makedirs(out)
    # log training gage information
    gageFile = os.path.join(out, "gage.json")
    with open(gageFile, "w") as fp:
        json.dump(gageDic, fp, indent=4)
    # define model config
    optModel = default.update(
        default.optLstm, name="hydroDL.model.rnn.CudnnLstmModel", hiddenSize=HIDDENSIZE
    )

    if interfaceOpt == 1:
        # define, load and train model
        optModel = default.update(optModel, nx=Nx, ny=Ny)
        model = rnn.CudnnLstmModel(
            nx=optModel["nx"], ny=optModel["ny"], hiddenSize=optModel["hiddenSize"]
        )
        # Wrap up all the training configurations to one dictionary in order to save into "out" folder
        masterDict = master.wrapMaster(out, optData, optModel, optLoss, optTrain)
        master.writeMasterFile(masterDict)
        # log statistics
        statFile = os.path.join(out, "statDict.json")
        with open(statFile, "w") as fp:
            json.dump(statDict, fp, indent=4)
        # Train the model
        trainedModel = train.trainModel(
            model,
            xTrain,
            yTrain,
            attrs,
            lossFun,