Esempio n. 1
0
def readSiteY(siteNo,
              varY,
              area=None,
              sd=np.datetime64('1979-01-01'),
              ed=np.datetime64('2020-01-01')):
    tr = pd.date_range(sd, ed)
    dfY = pd.DataFrame({'date': tr}).set_index('date')
    # extract data
    codeLst = [code for code in varY if code in usgs.codeLst]
    dfC, dfCF = usgs.readSample(siteNo,
                                codeLst=codeLst,
                                startDate=sd,
                                flag=True)
    if '00060' in varY or 'runoff' in varY:
        dfQ = usgs.readStreamflow(siteNo, startDate=sd)
        dfQ = dfQ.rename(columns={'00060_00003': '00060'})
        if 'runoff' in varY:
            if area is None:
                tabArea = gageII.readData(varLst=['DRAIN_SQKM'],
                                          siteNoLst=[siteNo])
                area = tabArea['DRAIN_SQKM'].values[0]
            dfQ['runoff'] = calRunoffArea(dfQ['00060'], area)
        dfY = dfY.join(dfQ)
    dfY = dfY.join(dfC)
    dfY = dfY.join(dfCF)
    dfY = dfY[varY]
    return dfY
Esempio n. 2
0
def readSiteTS(siteNo,
               varLst,
               freq='D',
               area=None,
               sd=np.datetime64('1979-01-01'),
               ed=np.datetime64('2019-12-31'),
               rmFlag=True):
    # read data
    td = pd.date_range(sd, ed)
    varC = list(set(varLst).intersection(usgs.varC))
    varQ = list(set(varLst).intersection(usgs.varQ))
    varF = list(set(varLst).intersection(gridMET.varLst))
    varP = list(set(varLst).intersection(ntn.varLst))
    varR = list(set(varLst).intersection(GLASS.varLst))
    varT = list(set(varLst).intersection(varTLst))

    dfD = pd.DataFrame({'date': td}).set_index('date')
    if len(varC) > 0:
        if rmFlag:
            dfC, dfCF = usgs.readSample(siteNo,
                                        codeLst=varC,
                                        startDate=sd,
                                        flag=2)
            dfC = usgs.removeFlag(dfC, dfCF)
        else:
            dfC = usgs.readSample(siteNo, codeLst=varC, startDate=sd)
        dfD = dfD.join(dfC)
    if len(varQ) > 0:
        dfQ = usgs.readStreamflow(siteNo, startDate=sd)
        dfQ = dfQ.rename(columns={'00060_00003': '00060'})
        if 'runoff' in varLst:
            if area is None:
                tabArea = gageII.readData(varLst=['DRAIN_SQKM'],
                                          siteNoLst=[siteNo])
                area = tabArea['DRAIN_SQKM'].values[0]
            dfQ['runoff'] = calRunoffArea(dfQ['00060'], area)
        dfD = dfD.join(dfQ)
    if len(varF) > 0:
        dfF = gridMET.readBasin(siteNo, varLst=varF)
        dfD = dfD.join(dfF)
    if len(varP) > 0:
        dfP = ntn.readBasin(siteNo, varLst=varP, freq='D')
        dfD = dfD.join(dfP)
    if len(varR) > 0:
        dfR = GLASS.readBasin(siteNo, varLst=varR, freq='D')
        dfD = dfD.join(dfR)
    if len(varT) > 0:
        t = dfD.index.values
        matT, _ = calT(t)
        dfT = pd.DataFrame(index=t, columns=varTLst, data=matT)
        dfD = dfD.join(dfT[varT])
    dfD = dfD[varLst]
    if freq == 'D':
        return dfD
    elif freq == 'W':
        dfW = dfD.resample('W-TUE').mean()
        return dfW
Esempio n. 3
0
def funcPoint(iP, axes):
    kA = 0
    siteNo = siteNoLst[iP]
    startDate = pd.datetime(1979, 1, 1)
    endDate = pd.datetime(2019, 12, 31)
    ctR = pd.date_range(startDate, endDate)
    dfData = pd.DataFrame({'date': ctR}).set_index('date')
    dfC = usgs.readSample(siteNo, codeLst=codeLst, startDate=startDate)
    dfQ = usgs.readStreamflow(siteNo, startDate=startDate)
    dfQ = dfQ.rename(columns={'00060_00003': '00060'})
    dfData = dfData.join(dfQ)
    dfData = dfData.join(dfC)

    # plot normalized time series
    ax = axes[kA]
    kA = kA + 1
    t = dfData.index.values
    dfDataN = (dfData - dfData.mean()) / dfData.std()
    varLst = dfData.columns.tolist()
    data = [dfDataN[var].values for var in varLst]
    legLst = ['streamflow'
              ] + [usgs.codePdf.loc[code]['shortName'] for code in codeLst]
    axplot.plotTS(ax, t, data, styLst='-***', cLst='krgb', legLst=legLst)

    # plot C-Q
    nc = len(codeLst)
    for k in range(nc):
        code = codeLst[k]
        q = dfData['00060']
        c = dfData[code]
        [q, c], ind = utils.rmNan([q, c])
        ax = axes[kA]
        kA = kA + 1
        ax.plot(np.log(q), np.log(c), 'r*')

    # plot fractual
    for k in range(nc):
        code = codeLst[k]
        dfV = dfData[dfData[code].notna()]
        nt = len(dfData)
        x = dfV.index.values.astype('datetime64[D]')
        y = dfV[code].values
        freq = 2 * np.pi / np.linspace(2, nt, nt)
        power = signal.lombscargle(x, y, freq)
        ax = axes[kA]
        kA = kA + 1
        ax.plot(np.log(freq / 2 * np.pi), np.log(power), '-*')
        fyr = 2 * np.pi / 365
        pyr = signal.lombscargle(x, y, [fyr])
        ax.plot(np.log(fyr / 2 * np.pi), np.log(pyr), 'r*')
Esempio n. 4
0
def funcPoint(iP, axP):
    siteNo = siteNoHBN[iP]
    dfC = usgs.readSample(siteNo, codeLst=usgs.codeLst)
    dfQ = usgs.readStreamflow(siteNo)
    df = dfC.join(dfQ)
    t = df.index.values
    q = df['00060_00003'].values / area * unitConv
    c = df[code].values
    [q, c], ind = utils.rmNan([q, c])
    t = t[ind]
    qAll = dfQ['00060_00003'].values
    qT = dfQ.index.values
    axplot.plotTS(axP[0], qT, qAll, cLst='b', styLst='--')
    axplot.plotTS(axP[1], t, c)
    axP[2].plot(np.log(q), c, 'k*')
    x = 10**np.linspace(np.log10(np.min(q[q > 0])),
                        np.log10(np.max(q[~np.isnan(q)])), 20)
    ceq0 = pMat2[iP, 0]
    dw0 = pMat2[iP, 1]
    y0 = ceq0 * 1 / (x / dw0 + 1)
    axP[2].plot(np.log(x), y0, 'r-')
    axP[2].set_title('ceq={:.3f},dw={:.3f}'.format(ceq0, dw0))
Esempio n. 5
0
def funcPoint(iP, axes):
    kA = 0
    siteNo = siteNoLst[iP]
    startDate = pd.datetime(1979, 1, 1)
    endDate = pd.datetime(2019, 12, 31)
    ctR = pd.date_range(startDate, endDate)
    dfData = pd.DataFrame({'date': ctR}).set_index('date')
    dfC = usgs.readSample(siteNo, codeLst=codeLst, startDate=startDate)
    dfQ = usgs.readStreamflow(siteNo, startDate=startDate)
    dfQ = dfQ.rename(columns={'00060_00003': '00060'})
    dfData = dfData.join(dfQ)
    dfData = dfData.join(dfC)

    # plot normalized time series
    ax = axes[kA]
    kA = kA + 1
    t = dfData.index.values
    dfDataN = (dfData - dfData.mean()) / dfData.std()
    varLst = dfData.columns.tolist()
    data = [dfDataN[var].values for var in varLst]
    legLst = ['streamflow'
              ] + [usgs.codePdf.loc[code]['shortName'] for code in codeLst]
    axplot.plotTS(ax, t, data, styLst='-***', cLst='krgb', legLst=legLst)
    ax.set_title(siteNo)

    # plot C-Q
    nc = len(codeLst)
    for k in range(nc):
        code = codeLst[k]
        q = dfData['00060']
        c = dfData[code]
        [q, c], ind = utils.rmNan([q, c])
        ceq, dw, y = wqRela.kateModel(q, c, q)
        ax = axes[kA]
        kA = kA + 1
        ax.plot(np.log(q), np.log(c), 'r*')
        ax.plot(np.log(q), np.log(y), 'b*')
Esempio n. 6
0
def readSiteX(siteNo,
              varX,
              area=None,
              nFill=5,
              sd=np.datetime64('1979-01-01'),
              ed=np.datetime64('2020-01-01')):
    tr = pd.date_range(sd, ed)
    dfX = pd.DataFrame({'date': tr}).set_index('date')
    # extract data
    dfF = gridMET.readBasin(siteNo)
    if '00060' in varX or 'runoff' in varX:
        dfQ = usgs.readStreamflow(siteNo, startDate=sd)
        dfQ = dfQ.rename(columns={'00060_00003': '00060'})
        if 'runoff' in varX:
            if area is None:
                tabArea = gageII.readData(varLst=['DRAIN_SQKM'],
                                          siteNoLst=[siteNo])
                area = tabArea['DRAIN_SQKM'].values[0]
            dfQ['runoff'] = calRunoffArea(dfQ['00060'], area)
        dfX = dfX.join(dfQ)
    dfX = dfX.join(dfF)
    dfX = dfX[varX]
    dfX = dfX.interpolate(limit=nFill, limit_direction='both')
    return dfX
Esempio n. 7
0
def funcPoint(iP, axP):
    siteNo = siteNoLst[iP]
    info1 = wqData.subsetInfo(trainSet)
    info2 = wqData.subsetInfo(testSet)
    ind1 = info1[info1['siteNo'] == siteNo].index
    ind2 = info2[info2['siteNo'] == siteNo].index
    t1 = info1['date'][ind1].values.astype(np.datetime64)
    t2 = info2['date'][ind2].values.astype(np.datetime64)
    tBar = t1[-1] + (t2[0] - t1[-1]) / 2
    t = np.concatenate([t1, t2])
    # plot Q
    tq = pd.date_range(t[0], t[-1])
    tempQ = usgs.readStreamflow(siteNo)
    dfQ = pd.DataFrame({'date': tq}).set_index('date').join(tempQ)
    axplot.plotTS(axP[0],
                  dfQ.index.values, [dfQ['00060_00003'].values],
                  tBar=tBar,
                  styLst=['--b'])
    # plot C
    k = 1
    for code in codeSel:
        ic = wqData.varC.index(code)
        shortName = codePdf.loc[code]['shortName']
        title = '{} {} {}'.format(siteNo, shortName, code)
        xTS = list()
        xTS.append(np.concatenate([pLst1[1][ind1, ic], pLst2[1][ind2, ic]]))
        xTS.append(np.concatenate([pLst1[3][ind1, ic], pLst2[3][ind2, ic]]))
        xTS.append(np.concatenate([o1[ind1, ic], o2[ind2, ic]]))
        axplot.plotTS(axP[k],
                      t,
                      xTS,
                      styLst=['--b', '--r', '*k'],
                      legLst=['w/ Q', 'w/o Q', 'obs'],
                      tBar=tBar)
        axP[k].set_title(title)
        k = k + 1
Esempio n. 8
0
dirUSGS = os.path.join(kPath.dirData, 'USGS')
dirInv = os.path.join(kPath.dirData, 'USGS', 'inventory')
dirCQ = os.path.join(kPath.dirWQ, 'C-Q')
fileSiteNoLst = os.path.join(dirInv, 'siteNoLst')
siteNoLst = pd.read_csv(fileSiteNoLst, header=None, dtype=str)[0].tolist()

t0 = time.time()
fileName = os.path.join(dirCQ, 'CQall')
if not os.path.exists(fileName):
    dictData = dict()
    errLst = list()
    for i, siteNo in enumerate(siteNoLst):
        csvC = os.path.join(kPath.dirData, 'USGS', 'sample', 'csv', siteNo)
        csvQ = os.path.join(kPath.dirData, 'USGS', 'streamflow', 'csv', siteNo)
        dfC = usgs.readSample(siteNo, codeLst=waterQuality.codeLst)
        dfQ = usgs.readStreamflow(siteNo)
        if len(dfC.index) == 0:
            errLst.append(siteNo)
        pdf = pd.concat(
            [dfC.set_index('date').dropna(how='all'),
             dfQ.set_index('date')],
            axis=1,
            join='inner')
        dictData[siteNo] = pdf
        print('\t {}/{} {:.2f}'.format(i, len(siteNoLst),
                                       time.time() - t0),
              end='\r')
    fileName = os.path.join(kPath.dirWQ, 'tempData', 'CQall')
    pickle.dump(dictData, open(fileName, 'wb'))
else:
    dictData = pickle.load(open(fileName, 'rb'))
Esempio n. 9
0
# read data and merge to: f/q=[nT,nP,nX], g/c=[nP,nY]
fLst = list()  # forcing ts
gLst = list()  # geo-const
qLst = list()  # streamflow
cLst = list()  # water quality
cfLst = list()  # water quality flags
infoLst = list()
t0 = time.time()
for i, siteNo in enumerate(siteNoLst):
    t1 = time.time()
    dfC, dfCF = usgs.readSample(siteNo,
                                codeLst=varC,
                                startDate=startDate,
                                flag=2)
    dfQ = usgs.readStreamflow(siteNo, startDate=startDate)
    dfF = gridMET.readBasin(siteNo)
    for k in range(len(dfC)):
        ct = dfC.index[k]
        ctR = pd.date_range(ct - pd.Timedelta(days=rho - 1), ct)
        if (ctR[0] < startDate) or (ctR[-1] > endDate):
            continue
        tempQ = pd.DataFrame({
            'date': ctR
        }).set_index('date').join(dfQ).interpolate(limit=nFill,
                                                   limit_direction='both')
        tempF = pd.DataFrame({
            'date': ctR
        }).set_index('date').join(dfF).join(dfP).interpolate(
            limit=nFill, limit_direction='both')
        qLst.append(tempQ.values)
Esempio n. 10
0
def wrapData(caseName,
             siteNoLst,
             rho=365,
             nFill=5,
             varC=usgs.varC,
             varG=gageII.lstWaterQuality):
    """ wrap up input and target data for the model,as:
    x=[nT,nP,nX]
    y=[nP,nY]
    c=[nP,nC]
    where nP is number of time series
    Arguments:
        caseName {str} -- name of current data case
        siteNoLst {list} -- list of USGS site
    Keyword Arguments:
        rho {int} -- [description] (default: {365})
        nFill {int} -- max number of continous nan to interpolate in input data (default: {5})
        varC {list} -- list of water quality code to learn (default: {usgs.lstCodeSample})
        varG {list} -- list of constant variables in gageII (default: {gageII.lstWaterQuality})
        varQ and varF are fixed so far
    """
    # add a start/end date to improve efficiency.
    startDate = pd.datetime(1979, 1, 1)
    endDate = pd.datetime(2019, 12, 31)

    # gageII
    tabG = gageII.readData(varLst=varG, siteNoLst=siteNoLst)
    tabG = gageII.updateCode(tabG)

    # read data and merge to: f/q=[nT,nP,nX], g/c=[nP,nY]
    fLst = list()  # forcing ts
    gLst = list()  # geo-const
    qLst = list()  # streamflow
    cLst = list()  # water quality
    cfLst = list()  # water quality flags
    infoLst = list()
    t0 = time.time()
    for i, siteNo in enumerate(siteNoLst):
        t1 = time.time()
        dfC, dfCF = usgs.readSample(siteNo,
                                    codeLst=varC,
                                    startDate=startDate,
                                    flag=2)
        dfQ = usgs.readStreamflow(siteNo, startDate=startDate)
        dfF = gridMET.readBasin(siteNo)
        for k in range(len(dfC)):
            ct = dfC.index[k]
            ctR = pd.date_range(ct - pd.Timedelta(days=rho - 1), ct)
            if (ctR[0] < startDate) or (ctR[-1] > endDate):
                continue
            tempQ = pd.DataFrame({
                'date': ctR
            }).set_index('date').join(dfQ).interpolate(limit=nFill,
                                                       limit_direction='both')
            tempF = pd.DataFrame({
                'date': ctR
            }).set_index('date').join(dfF).interpolate(limit=nFill,
                                                       limit_direction='both')
            qLst.append(tempQ.values)
            fLst.append(tempF.values)
            cLst.append(dfC.iloc[k].values)
            cfLst.append(dfCF.iloc[k].values)
            gLst.append(tabG.loc[siteNo].values)
            infoLst.append(dict(siteNo=siteNo, date=ct))
        t2 = time.time()
        print('{} on site {} reading {:.3f} total {:.3f}'.format(
            i, siteNo, t2 - t1, t2 - t0))
    q = np.stack(qLst, axis=-1).swapaxes(1, 2).astype(np.float32)
    f = np.stack(fLst, axis=-1).swapaxes(1, 2).astype(np.float32)
    g = np.stack(gLst, axis=-1).swapaxes(0, 1).astype(np.float32)
    c = np.stack(cLst, axis=-1).swapaxes(0, 1).astype(np.float32)
    cf = np.stack(cfLst, axis=-1).swapaxes(0, 1).astype(np.float32)
    infoDf = pd.DataFrame(infoLst)
    # add runoff
    runoff = calRunoff(q[:, :, 0], infoDf)
    q = np.stack([q[:, :, 0], runoff], axis=-1).astype(np.float32)
    saveFolder = os.path.join(kPath.dirWQ, 'trainData')
    saveName = os.path.join(saveFolder, caseName)
    np.savez(saveName, q=q, f=f, c=c, g=g, cf=cf)
    infoDf.to_csv(saveName + '.csv')
    dictData = dict(name=caseName,
                    rho=rho,
                    nFill=nFill,
                    varG=varG,
                    varC=varC,
                    varQ=['00060', 'runoff'],
                    varF=gridMET.varLst,
                    siteNoLst=siteNoLst)
    with open(saveName + '.json', 'w') as fp:
        json.dump(dictData, fp, indent=4)