Exemple #1
0
# wqData = waterQuality.DataModelWQ('Silica64')
# siteNoLst = wqData.siteNoLst
# if not waterQuality.exist('Silica64Seq'):
#     wqData = waterQuality2.DataModelWQ.new('Silica64Seq', siteNoLst)
# importlib.reload(waterQuality2)
# wqData = waterQuality2.DataModelWQ('Silica64Seq')
temp = waterQuality.DataModelWQ('Silica64')
siteNoLst = temp.siteNoLst
# wqData = waterQuality2.DataModelWQ.new('Silica64Seq', siteNoLst)

wqData = waterQuality2.DataModelWQ('Silica64Seq')

# subset only have silica
code = '00955'
ic = wqData.varQ.index(code)
indC = np.where(~np.isnan(wqData.q[-1,:, ic]))[0]
wqData.saveSubset(code, indC)
indYr1 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[1979, 2000])[0]
wqData.saveSubset('{}-Y8090'.format(code), indYr1)
indYr2 = waterQuality.indYr(wqData.info.iloc[indC], yrLst=[2000, 2020])[0]
wqData.saveSubset('{}-Y0010'.format(code), indYr2)

saveName = 'Silica64Seq-Y8090'
caseName = basins.wrapMaster(dataName='Silica64Seq', trainName='00955-Y8090',
                             batchSize=[None, 200], varY=['00060','00955'], varYC=None,
                             outName=saveName)


cmdP = 'python /home/users/kuaifang/GitHUB/geolearn/app/waterQual/model/cmdTrain.py -M {}'
slurm.submitJobGPU(caseName, cmdP.format(caseName), nH=6)
Exemple #2
0
# rmnan in q
q = wqData.q[:, :, 0]
info = wqData.info
len(np.where(np.isnan(q).all(axis=0))[0])
len(np.where(np.isnan(q).any(axis=0))[0])
len(wqData.info)  # nan in Q - 3% all nan, 7% any nan
indR = np.where(np.isnan(q).any(axis=0))[0]
indK = np.where(~np.isnan(q).any(axis=0))[0]
# purify q
infoK = info.iloc[indK]
dfSite = waterQuality.countSite(infoK)
ind1 = dfSite[dfSite['pRank'] <= 0.5].index.values
ind2 = dfSite[dfSite['pRank'] > 0.5].index.values
wqData.saveSubset(['pQ-F50', 'pQ-L50'], [ind1, ind2])
# yr after purify q
indYr = waterQuality.indYr(infoK)
yrLst = ['Y80', 'Y90', 'Y00', 'Y10']
wqData.saveSubset(['pQ-' + x for x in yrLst], indYr)
indYrCmp = list()
indAll = infoK.index.values
for ind in indYr:
    indYrCmp.append(np.setdiff1d(indAll, ind))
wqData.saveSubset(['pQ-rm' + x for x in yrLst], indYrCmp)
# validate
# d=wqData.info.iloc[wqData.subset['pQ-Y00']]['date']
# np.sort(pd.DatetimeIndex(d).year.unique())

# regional / area subsets
varG = ['DRAIN_SQKM', 'ECO2_BAS_DOM', 'NUTR_BAS_DOM', 'HLR_BAS_DOM_100M']
tabG = gageII.readData(varLst=varG, siteNoLst=wqData.siteNoLst)
info = wqData.info
Exemple #3
0
doLst = list()
doLst.append('subset')

if 'subset' in doLst:
    # find ind have SiO4, NO3
    codeLst = ['00618', '00955']
    icLst = [wqData.varC.index(code) for code in codeLst]
    indAll = np.where(~np.isnan(wqData.c[:, icLst]).all(axis=1))[0]
    indAny = np.where(~np.isnan(wqData.c[:, icLst]).any(axis=1))[0]
    # print number of samples
    for code in codeLst:
        ic = wqData.varC.index(code)
        indC = np.where(~np.isnan(wqData.c[:, ic]))[0]
    # seperate index by years
    for ind, lab in zip([indAll, indAny], ['all', 'any']):
        indYr = waterQuality.indYr(
            wqData.info.iloc[ind], yrLst=[1979, 2000])[0]
        indYrCmp = np.setdiff1d(ind, indYr)
        wqData.saveSubset('-'.join(sorted(codeLst)+[lab, 'Y8090']), indYr)
        wqData.saveSubset('-'.join(sorted(codeLst)+[lab, 'rmY8090']), indYrCmp)
    for code in codeLst:
        ic = wqData.varC.index(code)
        indC = np.where(~np.isnan(wqData.c[:, ic]))[0]
        indYr = waterQuality.indYr(
            wqData.info.iloc[indC], yrLst=[1979, 2000])[0]
        indYrCmp = np.setdiff1d(indC, indYr)
        wqData.saveSubset(code+'-Y8090', indYr)
        wqData.saveSubset(code+'-rmY8090', indYrCmp)
    # d=wqData.info.iloc[wqData.subset['00618-00955-any-Y10']]['date']
    # np.sort(pd.DatetimeIndex(d).year.unique())
    # ind=wqData.info.iloc[wqData.subset['00618-00955-any-Y10']].index.values
    # wqData.c[ind, wqData.varC.index('00618')]