def do_lowercheck(cf, ds, section, series, code=2): """ Purpose: Usage: Author: PRI Date: February 2017 """ # check to see if LowerCheck requested for this variable if "LowerCheck" not in cf[section][series]: return # Check to see if limits have been specified if len(cf[section][series]["LowerCheck"].keys()) == 0: msg = "do_lowercheck: no date ranges specified" logger.info(msg) return ldt = ds.series["DateTime"]["Data"] ts = ds.globalattributes["time_step"] data, flag, attr = qcutils.GetSeriesasMA(ds, series) lc_list = list(cf[section][series]["LowerCheck"].keys()) for n, item in enumerate(lc_list): # this should be a list and we should probably check for compliance lwr_info = cf[section][series]["LowerCheck"][item] attr["lowercheck_" + str(n)] = str(lwr_info) start_date = dateutil.parser.parse(lwr_info[0]) su = float(lwr_info[1]) end_date = dateutil.parser.parse(lwr_info[2]) eu = float(lwr_info[3]) # get the start and end indices si = qcutils.GetDateIndex(ldt, start_date, ts=ts, default=0, match="exact") ei = qcutils.GetDateIndex(ldt, end_date, ts=ts, default=len(ldt) - 1, match="exact") # get the segment of data between this start and end date seg_data = data[si:ei + 1] seg_flag = flag[si:ei + 1] x = numpy.arange(si, ei + 1, 1) lower = numpy.interp(x, [si, ei], [su, eu]) index = numpy.ma.where((seg_data < lower))[0] seg_data[index] = numpy.ma.masked seg_flag[index] = numpy.int32(code) data[si:ei + 1] = seg_data flag[si:ei + 1] = seg_flag # now put the data back into the data structure qcutils.CreateSeries(ds, series, data, Flag=flag, Attr=attr) return
def ImportSeries(cf, ds): # check to see if there is an Imports section if "Imports" not in cf.keys(): return # number of records nRecs = int(ds.globalattributes["nc_nrecs"]) # get the start and end datetime ldt = ds.series["DateTime"]["Data"] start_date = ldt[0] end_date = ldt[-1] # loop over the series in the Imports section for label in cf["Imports"].keys(): import_filename = qcutils.get_keyvaluefromcf(cf, ["Imports", label], "file_name", default="") if import_filename == "": msg = " ImportSeries: import filename not found in control file, skipping ..." logger.warning(msg) continue var_name = qcutils.get_keyvaluefromcf(cf, ["Imports", label], "var_name", default="") if var_name == "": msg = " ImportSeries: variable name not found in control file, skipping ..." logger.warning(msg) continue ds_import = qcio.nc_read_series(import_filename) ts_import = ds_import.globalattributes["time_step"] ldt_import = ds_import.series["DateTime"]["Data"] si = qcutils.GetDateIndex(ldt_import, str(start_date), ts=ts_import, default=0, match="exact") ei = qcutils.GetDateIndex(ldt_import, str(end_date), ts=ts_import, default=len(ldt_import) - 1, match="exact") data = numpy.ma.ones(nRecs) * float(c.missing_value) flag = numpy.ma.ones(nRecs) data_import, flag_import, attr_import = qcutils.GetSeriesasMA( ds_import, var_name, si=si, ei=ei) ldt_import = ldt_import[si:ei + 1] index = qcutils.FindIndicesOfBInA(ldt_import, ldt) data[index] = data_import flag[index] = flag_import qcutils.CreateSeries(ds, label, data, flag, attr_import)
def gfMDS_make_data_array(ds, current_year, info): """ Purpose: Create a data array for the MDS gap filling routine. The array constructed here will be written to a CSV file that is read by the MDS C code. Usage: Side Effects: The constructed data arrays are full years. That is they run from YYYY-01-01 00:30 to YYYY+1-01-01 00:00. Missing data is represented as -9999. Author: PRI Date: May 2018 """ ldt = qcutils.GetVariable(ds, "DateTime") nrecs = ds.globalattributes["nc_nrecs"] ts = int(ds.globalattributes["time_step"]) start = datetime.datetime(current_year, 1, 1, 0, 30, 0) end = datetime.datetime(current_year + 1, 1, 1, 0, 0, 0) cdt = numpy.array([ dt for dt in qcutils.perdelta(start, end, datetime.timedelta(minutes=ts)) ]) mt = numpy.ones(len(cdt)) * float(-9999) # need entry for the timestamp and the target ... array_list = [cdt, mt] # ... and entries for the drivers for driver in info["drivers"]: array_list.append(mt) # now we can create the data array data = numpy.stack(array_list, axis=-1) si = qcutils.GetDateIndex(ldt["Data"], start, default=0) ei = qcutils.GetDateIndex(ldt["Data"], end, default=nrecs) dt = qcutils.GetVariable(ds, "DateTime", start=si, end=ei) idx1, _ = qcutils.FindMatchingIndices(cdt, dt["Data"]) pfp_label_list = [info["target"]] + info["drivers"] mds_label_list = [info["target_mds"]] + info["drivers_mds"] header = "TIMESTAMP" fmt = "%12i" for n, label in enumerate(pfp_label_list): var = qcutils.GetVariable(ds, label, start=si, end=ei) data[idx1, n + 1] = var["Data"] header = header + "," + mds_label_list[n] fmt = fmt + "," + "%f" # convert datetime to ISO dates data[:, 0] = numpy.array([int(xdt.strftime("%Y%m%d%H%M")) for xdt in cdt]) return data, header, fmt
def make_data_array(ds, current_year): ldt = qcutils.GetVariable(ds, "DateTime") nrecs = ds.globalattributes["nc_nrecs"] ts = int(ds.globalattributes["time_step"]) start = datetime.datetime(current_year, 1, 1, 0, 30, 0) end = datetime.datetime(current_year + 1, 1, 1, 0, 0, 0) cdt = numpy.array([ dt for dt in qcutils.perdelta(start, end, datetime.timedelta(minutes=ts)) ]) mt = numpy.ones(len(cdt)) * float(-9999) data = numpy.stack([cdt, mt, mt, mt, mt, mt, mt, mt], axis=-1) si = qcutils.GetDateIndex(ldt["Data"], start, default=0) ei = qcutils.GetDateIndex(ldt["Data"], end, default=nrecs) dt = qcutils.GetVariable(ds, "DateTime", start=si, end=ei) idx1, idx2 = qcutils.FindMatchingIndices(cdt, dt["Data"]) for n, label in enumerate(["Fc", "VPD", "ustar", "Ta", "Fsd", "Fh", "Fe"]): var = qcutils.GetVariable(ds, label, start=si, end=ei) data[idx1, n + 1] = var["Data"] # convert datetime to ISO dates data[:, 0] = numpy.array([int(xdt.strftime("%Y%m%d%H%M")) for xdt in cdt]) return data
def compare_eddypro(): epname = qcio.get_filename_dialog( title='Choose an EddyPro full output file') ofname = qcio.get_filename_dialog(title='Choose an L3 output file') ds_ep = qcio.read_eddypro_full(epname) ds_of = qcio.nc_read_series(ofname) dt_ep = ds_ep.series['DateTime']['Data'] dt_of = ds_of.series['DateTime']['Data'] start_datetime = max([dt_ep[0], dt_of[0]]) end_datetime = min([dt_ep[-1], dt_of[-1]]) si_of = qcutils.GetDateIndex(dt_of, str(start_datetime), ts=30, default=0, match='exact') ei_of = qcutils.GetDateIndex(dt_of, str(end_datetime), ts=30, default=len(dt_of), match='exact') si_ep = qcutils.GetDateIndex(dt_ep, str(start_datetime), ts=30, default=0, match='exact') ei_ep = qcutils.GetDateIndex(dt_ep, str(end_datetime), ts=30, default=len(dt_ep), match='exact') us_of = qcutils.GetVariableAsDictionary(ds_of, 'ustar', si=si_of, ei=ei_of) us_ep = qcutils.GetVariableAsDictionary(ds_ep, 'ustar', si=si_ep, ei=ei_ep) Fh_of = qcutils.GetVariableAsDictionary(ds_of, 'Fh', si=si_of, ei=ei_of) Fh_ep = qcutils.GetVariableAsDictionary(ds_ep, 'Fh', si=si_ep, ei=ei_ep) Fe_of = qcutils.GetVariableAsDictionary(ds_of, 'Fe', si=si_of, ei=ei_of) Fe_ep = qcutils.GetVariableAsDictionary(ds_ep, 'Fe', si=si_ep, ei=ei_ep) Fc_of = qcutils.GetVariableAsDictionary(ds_of, 'Fc', si=si_of, ei=ei_of) Fc_ep = qcutils.GetVariableAsDictionary(ds_ep, 'Fc', si=si_ep, ei=ei_ep) # copy the range check values from the OFQC attributes to the EP attributes for of, ep in zip([us_of, Fh_of, Fe_of, Fc_of], [us_ep, Fh_ep, Fe_ep, Fc_ep]): for item in ["rangecheck_upper", "rangecheck_lower"]: if item in of["Attr"]: ep["Attr"][item] = of["Attr"][item] # apply QC to the EddyPro data qcck.ApplyRangeCheckToVariable(us_ep) qcck.ApplyRangeCheckToVariable(Fc_ep) qcck.ApplyRangeCheckToVariable(Fe_ep) qcck.ApplyRangeCheckToVariable(Fh_ep) # plot the comparison plt.ion() fig = plt.figure(1, figsize=(8, 8)) qcplot.xyplot(us_ep["Data"], us_of["Data"], sub=[2, 2, 1], regr=2, xlabel='u*_EP (m/s)', ylabel='u*_OF (m/s)') qcplot.xyplot(Fh_ep["Data"], Fh_of["Data"], sub=[2, 2, 2], regr=2, xlabel='Fh_EP (W/m2)', ylabel='Fh_OF (W/m2)') qcplot.xyplot(Fe_ep["Data"], Fe_of["Data"], sub=[2, 2, 3], regr=2, xlabel='Fe_EP (W/m2)', ylabel='Fe_OF (W/m2)') qcplot.xyplot(Fc_ep["Data"], Fc_of["Data"], sub=[2, 2, 4], regr=2, xlabel='Fc_EP (umol/m2/s)', ylabel='Fc_OF (umol/m2/s)') plt.tight_layout() plt.draw() plt.ioff()
def climatology(cf): nc_filename = qcio.get_infilenamefromcf(cf) if not qcutils.file_exists(nc_filename): return xl_filename = nc_filename.replace(".nc", "_Climatology.xls") xlFile = xlwt.Workbook() ds = qcio.nc_read_series(nc_filename) # calculate Fa if it is not in the data structure if "Fa" not in ds.series.keys(): if "Fn" in ds.series.keys() and "Fg" in ds.series.keys(): qcts.CalculateAvailableEnergy(ds, Fa_out='Fa', Fn_in='Fn', Fg_in='Fg') else: log.warning(" Climatology: Fn or Fg not in data struicture") # get the time step ts = int(ds.globalattributes['time_step']) # get the site name SiteName = ds.globalattributes['site_name'] # get the datetime series dt = ds.series['DateTime']['Data'] Hdh = ds.series['Hdh']['Data'] Month = ds.series['Month']['Data'] # get the initial start and end dates StartDate = str(dt[0]) EndDate = str(dt[-1]) # find the start index of the first whole day (time=00:30) si = qcutils.GetDateIndex(dt, StartDate, ts=ts, default=0, match='startnextday') # find the end index of the last whole day (time=00:00) ei = qcutils.GetDateIndex(dt, EndDate, ts=ts, default=-1, match='endpreviousday') # get local views of the datetime series ldt = dt[si:ei + 1] Hdh = Hdh[si:ei + 1] Month = Month[si:ei + 1] # get the number of time steps in a day and the number of days in the data ntsInDay = int(24.0 * 60.0 / float(ts)) nDays = int(len(ldt)) / ntsInDay for ThisOne in cf['Variables'].keys(): if "AltVarName" in cf['Variables'][ThisOne].keys(): ThisOne = cf['Variables'][ThisOne]["AltVarName"] if ThisOne in ds.series.keys(): log.info(" Doing climatology for " + ThisOne) data, f, a = qcutils.GetSeriesasMA(ds, ThisOne, si=si, ei=ei) if numpy.ma.count(data) == 0: log.warning(" No data for " + ThisOne + ", skipping ...") continue fmt_str = get_formatstring(cf, ThisOne, fmt_def='') xlSheet = xlFile.add_sheet(ThisOne) Av_all = do_diurnalstats(Month, Hdh, data, xlSheet, format_string=fmt_str, ts=ts) # now do it for each day # we want to preserve any data that has been truncated by the use of the "startnextday" # and "endpreviousday" match options used above. Here we revisit the start and end indices # and adjust these backwards and forwards respectively if data has been truncated. nDays_daily = nDays ei_daily = ei si_daily = si sdate = ldt[0] edate = ldt[-1] # is there data after the current end date? if dt[-1] > ldt[-1]: # if so, push the end index back by 1 day so it is included ei_daily = ei + ntsInDay nDays_daily = nDays_daily + 1 edate = ldt[-1] + datetime.timedelta(days=1) # is there data before the current start date? if dt[0] < ldt[0]: # if so, push the start index back by 1 day so it is included si_daily = si - ntsInDay nDays_daily = nDays_daily + 1 sdate = ldt[0] - datetime.timedelta(days=1) # get the data and use the "pad" option to add missing data if required to # complete the extra days data, f, a = qcutils.GetSeriesasMA(ds, ThisOne, si=si_daily, ei=ei_daily, mode="pad") data_daily = data.reshape(nDays_daily, ntsInDay) xlSheet = xlFile.add_sheet(ThisOne + '(day)') write_data_1columnpertimestep(xlSheet, data_daily, ts, startdate=sdate, format_string=fmt_str) data_daily_i = do_2dinterpolation(data_daily) xlSheet = xlFile.add_sheet(ThisOne + 'i(day)') write_data_1columnpertimestep(xlSheet, data_daily_i, ts, startdate=sdate, format_string=fmt_str) elif ThisOne == "EF": log.info(" Doing evaporative fraction") EF = numpy.ma.zeros([48, 12]) + float(c.missing_value) Hdh, f, a = qcutils.GetSeriesasMA(ds, 'Hdh', si=si, ei=ei) Fa, f, a = qcutils.GetSeriesasMA(ds, 'Fa', si=si, ei=ei) Fe, f, a = qcutils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) for m in range(1, 13): mi = numpy.where(Month == m)[0] Fa_Num, Hr, Fa_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fa[mi], ts) Fe_Num, Hr, Fe_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fe[mi], ts) index = numpy.ma.where((Fa_Num > 4) & (Fe_Num > 4)) EF[:, m - 1][index] = Fe_Av[index] / Fa_Av[index] # reject EF values greater than upper limit or less than lower limit upr, lwr = get_rangecheck_limit(cf, 'EF') EF = numpy.ma.filled( numpy.ma.masked_where((EF > upr) | (EF < lwr), EF), float(c.missing_value)) # write the EF to the Excel file xlSheet = xlFile.add_sheet('EF') write_data_1columnpermonth(xlSheet, EF, ts, format_string='0.00') # do the 2D interpolation to fill missing EF values EFi = do_2dinterpolation(EF) xlSheet = xlFile.add_sheet('EFi') write_data_1columnpermonth(xlSheet, EFi, ts, format_string='0.00') # now do EF for each day Fa, f, a = qcutils.GetSeriesasMA(ds, 'Fa', si=si, ei=ei) Fe, f, a = qcutils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) EF = Fe / Fa EF = numpy.ma.filled( numpy.ma.masked_where((EF > upr) | (EF < lwr), EF), float(c.missing_value)) EF_daily = EF.reshape(nDays, ntsInDay) xlSheet = xlFile.add_sheet('EF(day)') write_data_1columnpertimestep(xlSheet, EF_daily, ts, startdate=ldt[0], format_string='0.00') EFi = do_2dinterpolation(EF_daily) xlSheet = xlFile.add_sheet('EFi(day)') write_data_1columnpertimestep(xlSheet, EFi, ts, startdate=ldt[0], format_string='0.00') elif ThisOne == "BR": log.info(" Doing Bowen ratio") BR = numpy.ma.zeros([48, 12]) + float(c.missing_value) Fe, f, a = qcutils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) Fh, f, a = qcutils.GetSeriesasMA(ds, 'Fh', si=si, ei=ei) for m in range(1, 13): mi = numpy.where(Month == m)[0] Fh_Num, Hr, Fh_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fh[mi], ts) Fe_Num, Hr, Fe_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fe[mi], ts) index = numpy.ma.where((Fh_Num > 4) & (Fe_Num > 4)) BR[:, m - 1][index] = Fh_Av[index] / Fe_Av[index] # reject BR values greater than upper limit or less than lower limit upr, lwr = get_rangecheck_limit(cf, 'BR') BR = numpy.ma.filled( numpy.ma.masked_where((BR > upr) | (BR < lwr), BR), float(c.missing_value)) # write the BR to the Excel file xlSheet = xlFile.add_sheet('BR') write_data_1columnpermonth(xlSheet, BR, ts, format_string='0.00') # do the 2D interpolation to fill missing EF values BRi = do_2dinterpolation(BR) xlSheet = xlFile.add_sheet('BRi') write_data_1columnpermonth(xlSheet, BRi, ts, format_string='0.00') # now do BR for each day ... Fe, f, a = qcutils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) Fh, f, a = qcutils.GetSeriesasMA(ds, 'Fh', si=si, ei=ei) BR = Fh / Fe BR = numpy.ma.filled( numpy.ma.masked_where((BR > upr) | (BR < lwr), BR), float(c.missing_value)) BR_daily = BR.reshape(nDays, ntsInDay) xlSheet = xlFile.add_sheet('BR(day)') write_data_1columnpertimestep(xlSheet, BR_daily, ts, startdate=ldt[0], format_string='0.00') BRi = do_2dinterpolation(BR_daily) xlSheet = xlFile.add_sheet('BRi(day)') write_data_1columnpertimestep(xlSheet, BRi, ts, startdate=ldt[0], format_string='0.00') elif ThisOne == "WUE": log.info(" Doing ecosystem WUE") WUE = numpy.ma.zeros([48, 12]) + float(c.missing_value) Fe, f, a = qcutils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) Fc, f, a = qcutils.GetSeriesasMA(ds, 'Fc', si=si, ei=ei) for m in range(1, 13): mi = numpy.where(Month == m)[0] Fc_Num, Hr, Fc_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fc[mi], ts) Fe_Num, Hr, Fe_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fe[mi], ts) index = numpy.ma.where((Fc_Num > 4) & (Fe_Num > 4)) WUE[:, m - 1][index] = Fc_Av[index] / Fe_Av[index] # reject WUE values greater than upper limit or less than lower limit upr, lwr = get_rangecheck_limit(cf, 'WUE') WUE = numpy.ma.filled( numpy.ma.masked_where((WUE > upr) | (WUE < lwr), WUE), float(c.missing_value)) # write the WUE to the Excel file xlSheet = xlFile.add_sheet('WUE') write_data_1columnpermonth(xlSheet, WUE, ts, format_string='0.00000') # do the 2D interpolation to fill missing EF values WUEi = do_2dinterpolation(WUE) xlSheet = xlFile.add_sheet('WUEi') write_data_1columnpermonth(xlSheet, WUEi, ts, format_string='0.00000') # now do WUE for each day ... Fe, f, a = qcutils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) Fc, f, a = qcutils.GetSeriesasMA(ds, 'Fc', si=si, ei=ei) WUE = Fc / Fe WUE = numpy.ma.filled( numpy.ma.masked_where((WUE > upr) | (WUE < lwr), WUE), float(c.missing_value)) WUE_daily = WUE.reshape(nDays, ntsInDay) xlSheet = xlFile.add_sheet('WUE(day)') write_data_1columnpertimestep(xlSheet, WUE_daily, ts, startdate=ldt[0], format_string='0.00000') WUEi = do_2dinterpolation(WUE_daily) xlSheet = xlFile.add_sheet('WUEi(day)') write_data_1columnpertimestep(xlSheet, WUEi, ts, startdate=ldt[0], format_string='0.00000') else: log.warning(" qcclim.climatology: requested variable " + ThisOne + " not in data structure") continue log.info(" Saving Excel file " + xl_filename) xlFile.save(xl_filename)
print("Problems found with time step") qcutils.FixTimeStep(ds_aws_30minute) qcutils.get_ymdhmsfromdatetime(ds_aws_30minute) dt_aws_30minute = ds_aws_30minute.series["DateTime"]["Data"] ddt = [ dt_aws_30minute[i + 1] - dt_aws_30minute[i] for i in range(0, len(dt_aws_30minute) - 1) ] print("Minimum time step is", min(ddt), " Maximum time step is", max(ddt)) dt_aws_30minute = ds_aws_30minute.series["DateTime"]["Data"] start_date = dt_aws_30minute[0] end_date = dt_aws_30minute[-1] si_wholehour = qcutils.GetDateIndex(dt_aws_30minute, str(start_date), ts=30, match="startnexthour") ei_wholehour = qcutils.GetDateIndex(dt_aws_30minute, str(end_date), ts=30, match="endprevioushour") start_date = dt_aws_30minute[si_wholehour] end_date = dt_aws_30minute[ei_wholehour] dt_aws_30minute_array = numpy.array(dt_aws_30minute[si_wholehour:ei_wholehour + 1]) nRecs_30minute = len(dt_aws_30minute_array) dt_aws_2d = numpy.reshape(dt_aws_30minute_array, (nRecs_30minute / 2, 2)) dt_aws_60minute = list(dt_aws_2d[:, 1]) nRecs_60minute = len(dt_aws_60minute) series_list = list(ds_aws_30minute.series.keys())
# read the BIOS file bios_ncfile = netCDF4.Dataset(infilename) time = bios_ncfile.variables["time"][:] nRecs = len(time) # set some global attributes ts = ds_30.globalattributes["time_step"] = 30 ds_30.globalattributes["time_zone"] = site_timezone ds_30.globalattributes["nc_nrecs"] = nRecs ds_30.globalattributes["xl_datemode"] = str(0) ds_30.globalattributes["site_name"] = cf["Sites"][site]["site_name"] time_units = getattr(bios_ncfile.variables["time"],"units") qcutils.get_datetimefromnctime(ds_30,time,time_units) qcutils.round_datetime(ds_30,mode="nearest_timestep") if qcutils.CheckTimeStep(ds_30): qcutils.FixTimeStep(ds_30) ldt_30 = ds_30.series["DateTime"]["Data"] si = qcutils.GetDateIndex(ldt_30,start_date,default=0,ts=ts,match="startnexthour") ei = qcutils.GetDateIndex(ldt_30,end_date,default=len(ldt_30),ts=ts,match="endprevioushour") ds_30.series["DateTime"]["Data"] = ds_30.series["DateTime"]["Data"][si:ei+1] ds_30.series["DateTime"]["Flag"] = ds_30.series["DateTime"]["Flag"][si:ei+1] ldt_30 = ds_30.series["DateTime"]["Data"] nRecs = ds_30.globalattributes["nc_nrecs"] = len(ldt_30) flag = numpy.zeros(nRecs) qcutils.get_ymdhmsfromdatetime(ds_30) xl_date_loc = qcutils.get_xldatefromdatetime(ds_30) attr = qcutils.MakeAttributeDictionary(long_name="Date/time (local) in Excel format",units="days since 1899-12-31 00:00:00") qcutils.CreateSeries(ds_30,"xlDateTime",xl_date_loc,flag,attr) # get the data for label in var_list: bios_name = cf["Variables"][label]["bios_name"] if len(bios_ncfile.variables[bios_name].shape)==1: #print label+" has 1 dimension"
def get_LL_params(ldt, Fsd, D, T, NEE, ER, LT_results, info): # Lasslop as it was written in Lasslop et al (2010), mostly ... # Actually, the only intended difference is the window length and offset # Lasslop et al used window_length=4, window_offset=2 mta = numpy.array([]) LL_results = { "start_date": mta, "mid_date": mta, "end_date": mta, "alpha": mta, "beta": mta, "k": mta, "rb": mta, "alpha_low": mta, "rb_low": mta, "rb_prior": mta, "E0": mta } LL_prior = {"rb": 1.0, "alpha": 0.01, "beta": 10, "k": 0} LL_fixed = {"D0": 1} D0 = LL_fixed["D0"] drivers = {} start_date = ldt[0] last_date = ldt[-1] end_date = start_date + datetime.timedelta(days=info["window_length"]) while end_date <= last_date: sub_results = {"RMSE": [], "alpha": [], "beta": [], "k": [], "rb": []} si = qcutils.GetDateIndex(ldt, str(start_date), ts=info["ts"]) ei = qcutils.GetDateIndex(ldt, str(end_date), ts=info["ts"]) drivers["Fsd"] = numpy.ma.compressed(Fsd[si:ei + 1]) drivers["D"] = numpy.ma.compressed(D[si:ei + 1]) drivers["T"] = numpy.ma.compressed(T[si:ei + 1]) NEEsub = numpy.ma.compressed(NEE[si:ei + 1]) ERsub = numpy.ma.compressed(ER[si:ei + 1]) mid_date = start_date + (end_date - start_date) / 2 # get the value of E0 for the period closest to the mid-point of this period diffs = [abs(dt - mid_date) for dt in LT_results["mid_date"]] val, idx = min((val, idx) for (idx, val) in enumerate(diffs)) LL_results["E0"] = numpy.append(LL_results["E0"], LT_results["E0_int"][idx]) LL_results["start_date"] = numpy.append(LL_results["start_date"], start_date) LL_results["mid_date"] = numpy.append(LL_results["mid_date"], mid_date) LL_results["end_date"] = numpy.append(LL_results["end_date"], end_date) if len(NEEsub) >= 10: # alpha and rb from linear fit between NEE and Fsd at low light levels idx = numpy.where(drivers["Fsd"] < 100)[0] if len(idx) >= 2: alpha_low, rb_low = numpy.polyfit(drivers["Fsd"][idx], NEEsub[idx], 1) else: alpha_low, rb_low = numpy.nan, numpy.nan if len(ERsub) >= 10: LL_prior["rb"] = numpy.mean(ERsub) for bm in [0.5, 1, 2]: LL_prior["beta"] = numpy.abs( numpy.percentile(NEEsub, 3) - numpy.percentile(NEEsub, 97)) LL_prior["beta"] = bm * LL_prior["beta"] E0 = LL_results["E0"][-1] p0 = [ LL_prior["alpha"], LL_prior["beta"], LL_prior["k"], LL_prior["rb"] ] try: fopt = lambda x, alpha, beta, k, rb: NEE_RHLRC_D( x, alpha, beta, k, D0, rb, E0) popt, pcov = curve_fit(fopt, drivers, NEEsub, p0=p0) alpha, beta, k, rb = popt[0], popt[1], popt[2], popt[3] last_alpha_OK = True except RuntimeError: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan last_alpha_OK = False # QC the parameters # k first if numpy.isnan(k) or k < 0 or k > 2: k = 0 try: p0 = [ LL_prior["alpha"], LL_prior["beta"], LL_prior["rb"] ] fopt = lambda x, alpha, beta, rb: NEE_RHLRC_D( x, alpha, beta, k, D0, rb, E0) popt, pcov = curve_fit(fopt, drivers, NEEsub, p0=p0) alpha, beta, rb = popt[0], popt[1], popt[2] last_alpha_OK = True except RuntimeError: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan last_alpha_OK = False # then alpha if numpy.isnan(alpha) or alpha < 0 or alpha > 0.22: if last_alpha_OK == True: alpha = LL_results["alpha"][-1] else: alpha = 0 try: p0 = [LL_prior["beta"], LL_prior["k"], LL_prior["rb"]] fopt = lambda x, beta, k, rb: NEE_RHLRC_D( x, alpha, beta, k, D0, rb, E0) popt, pcov = curve_fit(fopt, drivers, NEEsub, p0=p0) beta, k, rb = popt[0], popt[1], popt[2] except RuntimeError: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan # then beta if beta < 0: beta = 0 try: p0 = [LL_prior["alpha"], LL_prior["k"], LL_prior["rb"]] fopt = lambda x, alpha, k, rb: NEE_RHLRC_D( x, alpha, beta, k, D0, rb, E0) popt, pcov = curve_fit(fopt, drivers, NEEsub, p0=p0) alpha, k, rb = popt[0], popt[1], popt[2] except RuntimeError: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan elif beta > 250: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan # and finally rb if rb < 0: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan # now get the RMSE for this set of parameters if not numpy.isnan(alpha) and not numpy.isnan( beta) and not numpy.isnan(k) and not numpy.isnan(rb): NEEest = NEE_RHLRC_D(drivers, alpha, beta, k, D0, rb, E0) sub_results["RMSE"].append( numpy.sqrt(numpy.mean((NEEsub - NEEest)**2))) sub_results["alpha"].append(alpha) sub_results["beta"].append(beta) sub_results["k"].append(k) sub_results["rb"].append(rb) # now find the minimum RMSE and the set of parameters for the minimum if len(sub_results["RMSE"]) != 0: min_RMSE = min(sub_results["RMSE"]) idx = sub_results["RMSE"].index(min_RMSE) LL_results["alpha"] = numpy.append(LL_results["alpha"], sub_results["alpha"][idx]) LL_results["alpha_low"] = numpy.append(LL_results["alpha_low"], float(-1) * alpha_low) LL_results["rb"] = numpy.append(LL_results["rb"], sub_results["rb"][idx]) LL_results["rb_low"] = numpy.append(LL_results["rb_low"], rb_low) LL_results["rb_prior"] = numpy.append(LL_results["rb_prior"], LL_prior["rb"]) LL_results["beta"] = numpy.append(LL_results["beta"], sub_results["beta"][idx]) LL_results["k"] = numpy.append(LL_results["k"], sub_results["k"][idx]) else: LL_results["alpha"] = numpy.append(LL_results["alpha"], numpy.nan) LL_results["alpha_low"] = numpy.append(LL_results["alpha_low"], float(-1) * alpha_low) LL_results["rb"] = numpy.append(LL_results["rb"], numpy.nan) LL_results["rb_low"] = numpy.append(LL_results["rb_low"], rb_low) LL_results["rb_prior"] = numpy.append(LL_results["rb_prior"], LL_prior["rb"]) LL_results["beta"] = numpy.append(LL_results["beta"], numpy.nan) LL_results["k"] = numpy.append(LL_results["k"], numpy.nan) else: LL_results["alpha"] = numpy.append(LL_results["alpha"], numpy.nan) LL_results["alpha_low"] = numpy.append(LL_results["alpha_low"], numpy.nan) LL_results["rb"] = numpy.append(LL_results["rb"], numpy.nan) LL_results["rb_low"] = numpy.append(LL_results["rb_low"], numpy.nan) LL_results["rb_prior"] = numpy.append(LL_results["rb_prior"], LL_prior["rb"]) LL_results["beta"] = numpy.append(LL_results["beta"], numpy.nan) LL_results["k"] = numpy.append(LL_results["k"], numpy.nan) # update the start and end datetimes start_date = start_date + datetime.timedelta( days=info["window_offset"]) end_date = start_date + datetime.timedelta(days=info["window_length"]) LL_results["D0"] = D0 return LL_results
def get_LT_params(ldt, ER, T, info, mode="verbose"): """ Purpose: Returns rb and E0 for the Lloyd & Taylor respiration function. Usage: Author: PRI Date: April 2016 """ mta = numpy.array([]) LT_results = { "start_date": mta, "mid_date": mta, "end_date": mta, "rb": mta, "E0": mta, "rb_prior": mta, "E0_prior": mta } missed_dates = {"start_date": [], "end_date": []} LT_prior = {"rb": 1.0, "E0": 100} start_date = ldt[0] last_date = ldt[-1] end_date = start_date + datetime.timedelta(days=info["window_length"]) last_E0_OK = False while end_date <= last_date: LT_results["start_date"] = numpy.append(LT_results["start_date"], start_date) LT_results["mid_date"] = numpy.append( LT_results["mid_date"], start_date + (end_date - start_date) / 2) LT_results["end_date"] = numpy.append(LT_results["end_date"], end_date) si = qcutils.GetDateIndex(ldt, str(start_date), ts=info["ts"]) ei = qcutils.GetDateIndex(ldt, str(end_date), ts=info["ts"]) Tsub = numpy.ma.compressed(T[si:ei + 1]) ERsub = numpy.ma.compressed(ER[si:ei + 1]) if len(ERsub) >= 10: LT_prior["rb"] = numpy.mean(ERsub) p0 = [LT_prior["rb"], LT_prior["E0"]] try: popt, pcov = curve_fit(ER_LloydTaylor, Tsub, ERsub, p0=p0) except RuntimeError: missed_dates["start_date"].append(start_date) missed_dates["end_date"].append(end_date) # QC E0 results if popt[1] < 50 or popt[1] > 400: if last_E0_OK: popt[1] = LT_results["E0"][-1] last_E0_OK = False else: if popt[1] < 50: popt[1] = float(50) if popt[1] > 400: popt[1] = float(400) last_E0_OK = False # now recalculate rb p0 = LT_prior["rb"] if numpy.isnan(popt[1]): popt[1] = float(50) E0 = numpy.ones(len(Tsub)) * float(popt[1]) popt1, pcov1 = curve_fit(ER_LloydTaylor_fixedE0, [Tsub, E0], ERsub, p0=p0) popt[0] = popt1[0] else: last_E0_OK = True # QC rb results if popt[0] < 0: popt[0] = float(0) LT_results["rb"] = numpy.append(LT_results["rb"], popt[0]) LT_results["E0"] = numpy.append(LT_results["E0"], popt[1]) LT_results["rb_prior"] = numpy.append(LT_results["rb_prior"], numpy.mean(ERsub)) LT_results["E0_prior"] = numpy.append(LT_results["E0_prior"], LT_prior["E0"]) else: LT_results["rb"] = numpy.append(LT_results["rb"], numpy.nan) LT_results["E0"] = numpy.append(LT_results["E0"], numpy.nan) LT_results["rb_prior"] = numpy.append(LT_results["rb_prior"], numpy.nan) LT_results["E0_prior"] = numpy.append(LT_results["E0_prior"], numpy.nan) start_date = start_date + datetime.timedelta( days=info["window_offset"]) end_date = start_date + datetime.timedelta(days=info["window_length"]) # start_date = end_date # end_date = start_date+dateutil.relativedelta.relativedelta(years=1) if mode == "verbose": if len(missed_dates["start_date"]) != 0: msg = " No solution found for the following dates:" logger.warning(msg) for sd, ed in zip(missed_dates["start_date"], missed_dates["end_date"]): msg = " " + str(sd) + " to " + str(ed) logger.warning(msg) return LT_results
def gfMDS_get_mds_output(ds, mds_label, out_file_path, include_qc=False): """ Purpose: Reads the CSV file output by the MDS C code and puts the contents into the data structure. Usage: gfMDS_get_mds_output(ds, out_file_path, first_date, last_date, include_qc=False) where ds is a data structure out_file_path is the full path to the MDS output file include_qc controls treatment of the MDS QC output True = include QC output False = do not include QC output Side effects: New series are created in the data structure to hold the MDS data. Author: PRI Date: May 2018 """ ldt = qcutils.GetVariable(ds, "DateTime") first_date = ldt["Data"][0] last_date = ldt["Data"][-1] data_mds = numpy.genfromtxt(out_file_path, delimiter=",", names=True, autostrip=True, dtype=None) dt_mds = numpy.array( [dateutil.parser.parse(str(dt)) for dt in data_mds["TIMESTAMP"]]) si_mds = qcutils.GetDateIndex(dt_mds, first_date) ei_mds = qcutils.GetDateIndex(dt_mds, last_date) # get a list of the names in the data array mds_output_names = list(data_mds.dtype.names) # strip out the timestamp and the original data for item in ["TIMESTAMP", ds.mds[mds_label]["target_mds"]]: if item in mds_output_names: mds_output_names.remove(item) # check to see if the QC outputs have been requested if not include_qc: # if not, then remove them from the list of requested outputs for item in ["QC", "HAT", "SAMPLE", "STDDEV", "METHOD", "QC_HAT"]: if item in mds_output_names: mds_output_names.remove(item) # and now loop over the MDS output series for mds_output_name in mds_output_names: if mds_output_name == "FILLED": # get the gap filled target and write it to the data structure var_in = qcutils.GetVariable(ds, ds.mds[mds_label]["target"]) data = data_mds[mds_output_name][si_mds:ei_mds + 1] idx = numpy.where((numpy.ma.getmaskarray(var_in["Data"]) == True) & (abs(data - c.missing_value) > c.eps))[0] flag = numpy.array(var_in["Flag"]) flag[idx] = numpy.int32(40) attr = copy.deepcopy(var_in["Attr"]) attr["long_name"] = attr["long_name"] + ", gap filled using MDS" var_out = { "Label": mds_label, "Data": data, "Flag": flag, "Attr": attr } qcutils.CreateVariable(ds, var_out) elif mds_output_name == "TIMEWINDOW": # make the series name for the data structure mds_qc_label = "MDS" + "_" + ds.mds[mds_label][ "target"] + "_" + mds_output_name data = data_mds[mds_output_name][si_mds:ei_mds + 1] flag = numpy.zeros(len(data)) attr = { "long_name": "TIMEWINDOW from MDS gap filling for " + ds.mds[mds_label]["target"] } var_out = { "Label": mds_qc_label, "Data": data, "Flag": flag, "Attr": attr } qcutils.CreateVariable(ds, var_out) else: # make the series name for the data structure mds_qc_label = "MDS" + "_" + ds.mds[mds_label][ "target"] + "_" + mds_output_name data = data_mds[mds_output_name][si_mds:ei_mds + 1] flag = numpy.zeros(len(data)) attr = { "long_name": "QC field from MDS gap filling for " + ds.mds[mds_label]["target"] } var_out = { "Label": mds_qc_label, "Data": data, "Flag": flag, "Attr": attr } qcutils.CreateVariable(ds, var_out) return
def copy_datastructure(cf,ds_in): ''' Return a copy of a data structure based on the following rules: 1) if the netCDF file at the "copy_to" level does not exist then copy the existing data structure at the "input" level to create a new data structure at the "output" level. ''' # assumptions that need to be checked are: # - the start datetime of the two sets of data are the same # - the end datetime of the L3 data is the same or after the # end datetime of the the L4 data # - if the end datetimes are the same then we are just re-processing something # - if the end datetime for the L3 data is after the end date of the L4 data # then more data has been added to this year and the user wants to gap fill # the new data # modificatons to be made: # - check the modification datetime of the L3 and L4 files: # - if the L3 file is newer than the L4 file the disregard the "UseExistingOutFile" setting # get the output (L4) file name ct_filename = cf['Files']['file_path']+cf['Files']['out_filename'] # if the L4 file does not exist then create the L4 data structure as a copy # of the L3 data structure if not os.path.exists(ct_filename): ds_out = copy.deepcopy(ds_in) # if the L4 file does exist ... if os.path.exists(ct_filename): # check to see if the user wants to use it if cf['Options']['UseExistingOutFile']!='Yes': # if the user doesn't want to use the existing L4 data then create # the L4 data structure as a copy of the L3 data structure ds_out = copy.deepcopy(ds_in) else: # the user wants to use the data from an existing L4 file # get the netCDF file name at the "input" level outfilename = get_outfilename_from_cf(cf) # read the netCDF file at the "input" level ds_file = nc_read_series(outfilename) dt_file = ds_file.series['DateTime']['Data'] sd_file = str(dt_file[0]) ed_file = str(dt_file[-1]) # create a copy of the data ds_out = copy.deepcopy(ds_in) dt_out = ds_out.series['DateTime']['Data'] ts = ds_out.globalattributes['time_step'] sd_out = str(dt_out[0]) ed_out = str(dt_out[-1]) # get the start and end indices based on the start and end dates si = qcutils.GetDateIndex(dt_out,sd_file,ts=ts,default=0,match='exact') ei = qcutils.GetDateIndex(dt_out,ed_file,ts=ts,default=-1,match='exact') # now replace parts of ds_out with the data read from file for ThisOne in ds_file.series.keys(): # check to see if the L4 series exists in the L3 data if ThisOne in ds_out.series.keys(): # ds_out is the copy of the L3 data, now fill it with the L4 data read from file ds_out.series[ThisOne]['Data'][si:ei+1] = ds_file.series[ThisOne]['Data'] ds_out.series[ThisOne]['Flag'][si:ei+1] = ds_file.series[ThisOne]['Flag'] else: # if it doesn't, create the series and put the data into it ds_out.series[ThisOne] = {} ds_out.series[ThisOne] = ds_file.series[ThisOne].copy() # check to see if we have to append data to make the copy of the L4 data now # in the L3 data structure the same length as the existing L3 data nRecs_file = int(ds_file.globalattributes['nc_nrecs']) nRecs_out = int(ds_out.globalattributes['nc_nrecs']) if nRecs_file < nRecs_out: # there is more data at L3 than at L4 # append missing data to make the series the same length nRecs_append = nRecs_out - nRecs_file data = numpy.array([-9999]*nRecs_append,dtype=numpy.float64) flag = numpy.ones(nRecs_append,dtype=numpy.int32) ds_out.series[ThisOne]['Data'] = numpy.concatenate((ds_out.series[ThisOne]['Data'],data)) ds_out.series[ThisOne]['Flag'] = numpy.concatenate((ds_out.series[ThisOne]['Flag'],flag)) elif nRecs_file > nRecs_out: # tell the user something is wrong log.error('copy_datastructure: L3 file contains less data than L4 file') # return an empty dictionary ds_out = {} else: # nRecs_file and nRecs_out are equal so we do not need to do anything pass return ds_out
# read the input file and return the data structure ds = qcio.nc_read_series(fname) if len(ds.series.keys()) == 0: print time.strftime('%X') + ' netCDF file ' + fname + ' not found' sys.exit() # get the site name SiteName = ds.globalattributes['site_name'] # get the time step ts = int(ds.globalattributes['time_step']) # get the datetime series DateTime = ds.series['DateTime']['Data'] # get the initial start and end dates # find the start index of the first whole day (time=00:30) si = qcutils.GetDateIndex(DateTime, str(DateTime[0]), ts=ts, default=0, match='startnextday') # find the end index of the last whole day (time=00:00) ei = qcutils.GetDateIndex(DateTime, str(DateTime[-1]), ts=ts, default=-1, match='endpreviousday') # clip the datetime series to a whole number of days DateTime = DateTime[si:ei + 1] StartDate = DateTime[0] EndDate = DateTime[-1] print time.strftime('%X') + ' Start date; ' + str( StartDate) + ' End date; ' + str(EndDate) Hdh = ds.series['Hdh']['Data'][si:ei + 1]