Ejemplo n.º 1
0
def runModel(dfX, dfG):
    # test model
    xA = np.expand_dims(dfX.values, axis=1)
    xcA = np.expand_dims(dfG.values.astype(np.float), axis=0)
    mtdX = wqData.extractVarMtd(varX)
    x = transform.transInAll(xA, mtdX, statLst=statX)
    mtdXC = wqData.extractVarMtd(varXC)
    xc = transform.transInAll(xcA, mtdXC, statLst=statXC)
    yOut = trainTS.testModel(model, x, xc)
    # transfer out
    nt = len(dfX)
    ny = len(varY) if varY is not None else 0
    nyc = len(varYC) if varYC is not None else 0
    yP = np.full([nt, ny + nyc], np.nan)
    yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY)
    yP[:, ny:] = wqData.transOut(yOut[:, 0, ny:], statYC, varYC)
    # save output
    t = dfX.index.values.astype('datetime64[D]')
    colY = [] if varY is None else varY
    colYC = [] if varYC is None else varYC
    dfOut = pd.DataFrame(data=yP, columns=colY + colYC, index=t)
    dfOut.index.name = 'date'
    dfOut = dfOut.reset_index()
    return dfOut
Ejemplo n.º 2
0
def testModel(outName, testset, wqData=None, ep=None, reTest=False):
    # load master
    master = loadMaster(outName)
    if ep is None:
        ep = master['nEpoch']
    outFolder = nameFolder(outName)
    testFileName = 'testP-{}-Ep{}.npz'.format(testset, ep)
    testFile = os.path.join(outFolder, testFileName)

    if os.path.exists(testFile) and reTest is False:
        print('load saved test result')
        npz = np.load(testFile, allow_pickle=True)
        yP = npz['yP']
        ycP = npz['ycP']
    else:
        statTup = loadStat(outName)
        model = loadModel(outName, ep=ep)
        # load test data
        if wqData is None:
            wqData = waterQuality.DataModelWQ(master['dataName'])
        varTup = (master['varX'], master['varXC'], master['varY'],
                  master['varYC'])
        testDataLst = wqData.transIn(subset=testset,
                                     statTup=statTup,
                                     varTup=varTup)
        sizeLst = trainTS.getSize(testDataLst)
        testDataLst = trainTS.dealNaN(testDataLst, master['optNaN'])
        x = testDataLst[0]
        xc = testDataLst[1]
        ny = sizeLst[2]
        # test model - point by point
        yOut, ycOut = trainTS.testModel(model, x, xc, ny)
        yP = wqData.transOut(yOut, statTup[2], master['varY'])
        ycP = wqData.transOut(ycOut, statTup[3], master['varYC'])
        np.savez(testFile, yP=yP, ycP=ycP)
    return yP, ycP
Ejemplo n.º 3
0
def testModelSeq(outName,
                 siteNoLst,
                 wqData=None,
                 ep=None,
                 returnOut=False,
                 retest=False,
                 sd=np.datetime64('1979-01-01'),
                 ed=np.datetime64('2019-12-31')):
    # run sequence test for all sites, default to be from first date to last date
    if type(siteNoLst) is not list:
        siteNoLst = [siteNoLst]
    master = loadMaster(outName)
    if master['crit'] == 'SigmaLoss':
        doSigma = True
    else:
        doSigma = False
    if ep is None:
        ep = master['nEpoch']
    outDir = nameFolder(outName)
    sdS = pd.to_datetime(sd).strftime('%Y%m%d')
    edS = pd.to_datetime(ed).strftime('%Y%m%d')
    saveDir = os.path.join(outDir, 'seq-{}-{}-ep{}'.format(sdS, edS, ep))
    if not os.path.exists(saveDir):
        os.mkdir(saveDir)
    siteSaveLst = os.listdir(saveDir)
    if retest is True:
        sitePredLst = siteNoLst
    else:
        sitePredLst = [
            siteNo for siteNo in siteNoLst if siteNo not in siteSaveLst
        ]
    if len(sitePredLst) != 0:
        if wqData is None:
            wqData = waterQuality.DataModelWQ(master['dataName'])
        (varX, varXC, varY, varYC) = (master['varX'], master['varXC'],
                                      master['varY'], master['varYC'])
        (statX, statXC, statY, statYC) = loadStat(outName)
        model = loadModel(outName, ep=ep)
        tabG = gageII.readData(varLst=varXC, siteNoLst=siteNoLst)
        tabG = gageII.updateCode(tabG)
        for siteNo in sitePredLst:
            if 'DRAIN_SQKM' in varXC:
                area = tabG[tabG.index == siteNo]['DRAIN_SQKM'].values[0]
            else:
                area = None
            # test model
            print('testing {} from {} to {}'.format(siteNo, sdS, edS))
            freq = wqData.freq
            dfX = waterQuality.readSiteTS(siteNo,
                                          varX,
                                          freq=freq,
                                          area=area,
                                          sd=sd,
                                          ed=ed)
            # dfX = waterQuality.readSiteX(
            #     siteNo, varX, sd=sd, ed=ed, area=area, nFill=5)
            xA = np.expand_dims(dfX.values, axis=1)
            xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float),
                                 axis=0)
            mtdX = waterQuality.extractVarMtd(varX)
            x = transform.transInAll(xA, mtdX, statLst=statX)
            mtdXC = waterQuality.extractVarMtd(varXC)
            xc = transform.transInAll(xcA, mtdXC, statLst=statXC)
            [x, xc] = trainTS.dealNaN([x, xc], master['optNaN'][:2])
            yOut = trainTS.testModel(model, x, xc)
            # transfer out
            nt = len(dfX)
            ny = len(varY) if varY is not None else 0
            nyc = len(varYC) if varYC is not None else 0
            if doSigma:
                yP = np.full([nt, ny + nyc], np.nan)
                sP = np.full([nt, ny + nyc], np.nan)
                yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny * 2:2], statY,
                                             varY)
                yP[:, ny:] = wqData.transOut(yOut[:, 0, ny * 2::2], statYC,
                                             varYC)
                sP[:, :ny] = wqData.transOut(
                    np.sqrt(np.exp(yOut[:, 0, 1:ny * 2:2])), statY, varY)
                sP[:, ny:] = wqData.transOut(
                    np.sqrt(np.exp(yOut[:, 0, ny * 2 + 1::2])), statYC, varYC)
            else:
                yP = np.full([nt, ny + nyc], np.nan)
                yP[:, :ny] = wqData.transOut(yOut[:, 0, :ny], statY, varY)
                yP[:, ny:] = wqData.transOut(yOut[:, 0, ny:], statYC, varYC)
            # save output
            t = dfX.index.values.astype('datetime64[D]')
            colY = [] if varY is None else varY
            colYC = [] if varYC is None else varYC
            dfOut = pd.DataFrame(data=yP, columns=[colY + colYC], index=t)
            dfOut.index.name = 'date'
            dfOut = dfOut.reset_index()
            dfOut.to_csv(os.path.join(saveDir, siteNo), index=False)
            if doSigma:
                dfOutS = pd.DataFrame(data=sP, columns=[colY + colYC], index=t)
                dfOutS.index.name = 'date'
                dfOutS = dfOut.reset_index()
                dfOutS.to_csv(os.path.join(saveDir, siteNo + '_sigma'),
                              index=False)
    # load all csv
    if returnOut:
        dictOut = dict()
        for siteNo in siteNoLst:
            # print('loading {} from {} to {}'.format(siteNo, sdS, edS))
            dfOut = pd.read_csv(os.path.join(saveDir, siteNo))
            dictOut[siteNo] = dfOut
            if doSigma:
                dfOut = pd.read_csv(os.path.join(saveDir, siteNo + '_sigma'))
                dictOut[siteNo + '_sigma'] = dfOut
        return dictOut
Ejemplo n.º 4
0
def testModel(outName, testset, wqData=None, ep=None, reTest=False):
    # load master
    master = loadMaster(outName)
    if master['crit'] == 'SigmaLoss':
        doSigma = True
    else:
        doSigma = False

    if ep is None:
        ep = master['nEpoch']
    outFolder = nameFolder(outName)
    testFileName = 'testP-{}-Ep{}.npz'.format(testset, ep)
    testFile = os.path.join(outFolder, testFileName)

    if os.path.exists(testFile) and reTest is False:
        print('load saved test result')
        npz = np.load(testFile, allow_pickle=True)
        yP = npz['yP']
        ycP = npz['ycP']
        if doSigma:
            sP = npz['sP']
            scP = npz['scP']
    else:
        statTup = loadStat(outName)
        model = loadModel(outName, ep=ep)
        # load test data
        if wqData is None:
            wqData = waterQuality.DataModelWQ(master['dataName'])
        varTup = (master['varX'], master['varXC'], master['varY'],
                  master['varYC'])
        testDataLst = wqData.transIn(subset=testset,
                                     statTup=statTup,
                                     varTup=varTup)
        sizeLst = trainTS.getSize(testDataLst)
        if master['optNaN'] == [2, 2, 0, 0]:
            master['optNaN'] = [0, 0, 0, 0]
        testDataLst = trainTS.dealNaN(testDataLst, master['optNaN'])
        x = testDataLst[0]
        xc = testDataLst[1]
        ny = sizeLst[2]
        if not doSigma:
            # test model - point by point
            yOut, ycOut = trainTS.testModel(model, x, xc, ny)
            yP = wqData.transOut(yOut, statTup[2], master['varY'])
            ycP = wqData.transOut(ycOut, statTup[3], master['varYC'])
            np.savez(testFile, yP=yP, ycP=ycP)
        else:
            print('sigma model')
            ny = ny * 2
            yOut, ycOut = trainTS.testModel(model, x, xc, ny)
            yP = wqData.transOut(yOut[:, :, ::2], statTup[2], master['varY'])
            sP = wqData.transOut(np.sqrt(np.exp(yOut[:, :, 1::2])), statTup[2],
                                 master['varY'])
            ycP = wqData.transOut(ycOut[:, ::2], statTup[3], master['varYC'])
            scP = wqData.transOut(np.sqrt(np.exp(ycOut[:, 1::2])), statTup[3],
                                  master['varYC'])
            np.savez(testFile, yP=yP, ycP=ycP, sP=sP, scP=scP)
    if doSigma:
        return yP, ycP, sP, scP
    else:
        return yP, ycP
Ejemplo n.º 5
0
    dfY = dfY.join(dfQ)
dfX = dfX.join(dfF)
dfY = dfY.join(dfC)
dfX = dfX[varX]
dfY = dfY[varY + varYC]

# normalize concat input data
dfX = dfX.interpolate(limit=nFill, limit_direction='both')
xA = np.expand_dims(dfX.values, axis=1)
xcA = np.expand_dims(tabG.loc[siteNo].values.astype(np.float), axis=0)
mtdX = wqData.extractVarMtd(varX)
x = transform.transInAll(xA, mtdX, statLst=statX)
mtdXC = wqData.extractVarMtd(varXC)
xc = transform.transInAll(xcA, mtdXC, statLst=statXC)

yP = trainTS.testModel(model, x, xc)

# # test
# nt = len(dfX)
# x, xc = trainTS.dealNaN((x, xc), dictP['optNaN'][:2])
# xx = np.concatenate([x, np.tile(xc[0, :], [1, nt, 1])], axis=-1).swapaxes(0, 1)
# xT = torch.from_numpy(xx).float()
# if torch.cuda.is_available():
#     xT = xT.cuda()
# # if i == 0 and ind1 == 0:
# #     try:
# #         yT = model(xT)
# #     except:
# #         print('first iteration failed again')
# yT = model(xT)
# yP = yT.detach().cpu().numpy()[:, 0, :]