def ImportSeries(cf,ds): # check to see if there is an Imports section if "Imports" not in cf.keys(): return # number of records nRecs = int(ds.globalattributes["nc_nrecs"]) # get the start and end datetime ldt = ds.series["DateTime"]["Data"] start_date = ldt[0] end_date = ldt[-1] # loop over the series in the Imports section for label in cf["Imports"].keys(): import_filename = pfp_utils.get_keyvaluefromcf(cf,["Imports",label],"file_name",default="") if import_filename=="": msg = " ImportSeries: import filename not found in control file, skipping ..." logger.warning(msg) continue var_name = pfp_utils.get_keyvaluefromcf(cf,["Imports",label],"var_name",default="") if var_name=="": msg = " ImportSeries: variable name not found in control file, skipping ..." logger.warning(msg) continue ds_import = pfp_io.nc_read_series(import_filename) ts_import = ds_import.globalattributes["time_step"] ldt_import = ds_import.series["DateTime"]["Data"] si = pfp_utils.GetDateIndex(ldt_import,str(start_date),ts=ts_import,default=0,match="exact") ei = pfp_utils.GetDateIndex(ldt_import,str(end_date),ts=ts_import,default=len(ldt_import)-1,match="exact") data = numpy.ma.ones(nRecs)*float(c.missing_value) flag = numpy.ma.ones(nRecs) data_import,flag_import,attr_import = pfp_utils.GetSeriesasMA(ds_import,var_name,si=si,ei=ei) ldt_import = ldt_import[si:ei+1] index = pfp_utils.FindIndicesOfBInA(ldt_import,ldt) data[index] = data_import flag[index] = flag_import pfp_utils.CreateSeries(ds,label,data,flag,attr_import)
def make_data_array(cf, ds, current_year): ldt = pfp_utils.GetVariable(ds, "DateTime") nrecs = int(ds.globalattributes["nc_nrecs"]) ts = int(ds.globalattributes["time_step"]) start = datetime.datetime(current_year, 1, 1, 0, 0, 0) + datetime.timedelta(minutes=ts) end = datetime.datetime(current_year + 1, 1, 1, 0, 0, 0) cdt = numpy.array([ dt for dt in pfp_utils.perdelta(start, end, datetime.timedelta( minutes=ts)) ]) mt = numpy.ones(len(cdt)) * float(-9999) mt_list = [cdt] + [mt for n in list(cf["Variables"].keys())] data = numpy.stack(mt_list, axis=-1) si = pfp_utils.GetDateIndex(ldt["Data"], start, default=0) ei = pfp_utils.GetDateIndex(ldt["Data"], end, default=nrecs) dt = pfp_utils.GetVariable(ds, "DateTime", start=si, end=ei) idx1, idx2 = pfp_utils.FindMatchingIndices(cdt, dt["Data"]) for n, cf_label in enumerate(list(cf["Variables"].keys())): label = cf["Variables"][cf_label]["name"] var = pfp_utils.GetVariable(ds, label, start=si, end=ei) data[idx1, n + 1] = var["Data"] # convert datetime to ISO dates data[:, 0] = numpy.array([int(xdt.strftime("%Y%m%d%H%M")) for xdt in cdt]) return data
def gfMDS_get_mds_output(ds, mds_label, out_file_path, l5_info, called_by): """ Purpose: Reads the CSV file output by the MDS C code and puts the contents into the data structure. Usage: gfMDS_get_mds_output(ds, out_file_path, first_date, last_date, include_qc=False) where ds is a data structure out_file_path is the full path to the MDS output file Side effects: New series are created in the data structure to hold the MDS data. Author: PRI Date: May 2018 """ ldt = pfp_utils.GetVariable(ds, "DateTime") first_date = ldt["Data"][0] last_date = ldt["Data"][-1] data_mds = numpy.genfromtxt(out_file_path, delimiter=",", names=True, autostrip=True, dtype=None) dt_mds = numpy.array([dateutil.parser.parse(str(dt)) for dt in data_mds["TIMESTAMP"]]) si_mds = pfp_utils.GetDateIndex(dt_mds, first_date) ei_mds = pfp_utils.GetDateIndex(dt_mds, last_date) # get a list of the names in the data array mds_output_names = list(data_mds.dtype.names) # strip out the timestamp and the original data for item in ["TIMESTAMP", l5_info[called_by]["outputs"][mds_label]["target_mds"]]: if item in mds_output_names: mds_output_names.remove(item) # and now loop over the MDS output series for mds_output_name in mds_output_names: if mds_output_name == "FILLED": # get the gap filled target and write it to the data structure var_in = pfp_utils.GetVariable(ds, l5_info[called_by]["outputs"][mds_label]["target"]) data = data_mds[mds_output_name][si_mds:ei_mds+1] idx = numpy.where((numpy.ma.getmaskarray(var_in["Data"]) == True) & (abs(data - c.missing_value) > c.eps))[0] flag = numpy.array(var_in["Flag"]) flag[idx] = numpy.int32(40) attr = copy.deepcopy(var_in["Attr"]) attr["long_name"] = attr["long_name"]+", gap filled using MDS" var_out = {"Label":mds_label, "Data":data, "Flag":flag, "Attr":attr} pfp_utils.CreateVariable(ds, var_out) elif mds_output_name == "TIMEWINDOW": # make the series name for the data structure mds_qc_label = "MDS"+"_"+l5_info[called_by]["outputs"][mds_label]["target"]+"_"+mds_output_name data = data_mds[mds_output_name][si_mds:ei_mds+1] flag = numpy.zeros(len(data)) attr = {"long_name":"TIMEWINDOW from MDS gap filling for "+l5_info[called_by]["outputs"][mds_label]["target"]} var_out = {"Label":mds_qc_label, "Data":data, "Flag":flag, "Attr":attr} pfp_utils.CreateVariable(ds, var_out) else: # make the series name for the data structure mds_qc_label = "MDS"+"_"+l5_info[called_by]["outputs"][mds_label]["target"]+"_"+mds_output_name data = data_mds[mds_output_name][si_mds:ei_mds+1] flag = numpy.zeros(len(data)) attr = {"long_name":"QC field from MDS gap filling for "+l5_info[called_by]["outputs"][mds_label]["target"]} var_out = {"Label":mds_qc_label, "Data":data, "Flag":flag, "Attr":attr} pfp_utils.CreateVariable(ds, var_out) return
def do_lowercheck(cf, ds, section, series, code=2): """ Purpose: Usage: Author: PRI Date: February 2017 """ # check to see if LowerCheck requested for this variable if "LowerCheck" not in cf[section][series]: return # Check to see if limits have been specified if len(cf[section][series]["LowerCheck"].keys()) == 0: msg = "do_lowercheck: no date ranges specified" logger.info(msg) return ldt = ds.series["DateTime"]["Data"] ts = ds.globalattributes["time_step"] data, flag, attr = pfp_utils.GetSeriesasMA(ds, series) lc_list = list(cf[section][series]["LowerCheck"].keys()) for n, item in enumerate(lc_list): # this should be a list and we should probably check for compliance lwr_info = cf[section][series]["LowerCheck"][item] attr["lowercheck_" + str(n)] = str(lwr_info) start_date = dateutil.parser.parse(lwr_info[0]) su = float(lwr_info[1]) end_date = dateutil.parser.parse(lwr_info[2]) eu = float(lwr_info[3]) # get the start and end indices si = pfp_utils.GetDateIndex(ldt, start_date, ts=ts, default=0, match="exact") ei = pfp_utils.GetDateIndex(ldt, end_date, ts=ts, default=len(ldt) - 1, match="exact") # get the segment of data between this start and end date seg_data = data[si:ei + 1] seg_flag = flag[si:ei + 1] x = numpy.arange(si, ei + 1, 1) lower = numpy.interp(x, [si, ei], [su, eu]) index = numpy.ma.where((seg_data < lower))[0] seg_data[index] = numpy.ma.masked seg_flag[index] = numpy.int32(code) data[si:ei + 1] = seg_data flag[si:ei + 1] = seg_flag # now put the data back into the data structure pfp_utils.CreateSeries(ds, series, data, Flag=flag, Attr=attr) return
def compare_eddypro(): epname = pfp_io.get_filename_dialog(title='Choose an EddyPro full output file') ofname = pfp_io.get_filename_dialog(title='Choose an L3 output file') ds_ep = pfp_io.read_eddypro_full(epname) ds_of = pfp_io.nc_read_series(ofname) dt_ep = ds_ep.series['DateTime']['Data'] dt_of = ds_of.series['DateTime']['Data'] start_datetime = max([dt_ep[0],dt_of[0]]) end_datetime = min([dt_ep[-1],dt_of[-1]]) si_of = pfp_utils.GetDateIndex(dt_of, str(start_datetime), ts=30, default=0, match='exact') ei_of = pfp_utils.GetDateIndex(dt_of, str(end_datetime), ts=30, default=len(dt_of), match='exact') si_ep = pfp_utils.GetDateIndex(dt_ep, str(start_datetime), ts=30, default=0, match='exact') ei_ep = pfp_utils.GetDateIndex(dt_ep, str(end_datetime), ts=30, default=len(dt_ep), match='exact') us_of = pfp_utils.GetVariable(ds_of,'ustar',start=si_of,end=ei_of) us_ep = pfp_utils.GetVariable(ds_ep,'ustar',start=si_ep,end=ei_ep) Fh_of = pfp_utils.GetVariable(ds_of,'Fh',start=si_of,end=ei_of) Fh_ep = pfp_utils.GetVariable(ds_ep,'Fh',start=si_ep,end=ei_ep) Fe_of = pfp_utils.GetVariable(ds_of,'Fe',start=si_of,end=ei_of) Fe_ep = pfp_utils.GetVariable(ds_ep,'Fe',start=si_ep,end=ei_ep) Fc_of = pfp_utils.GetVariable(ds_of,'Fc',start=si_of,end=ei_of) Fc_ep = pfp_utils.GetVariable(ds_ep,'Fc',start=si_ep,end=ei_ep) # copy the range check values from the OFQC attributes to the EP attributes for of, ep in zip([us_of, Fh_of, Fe_of, Fc_of], [us_ep, Fh_ep, Fe_ep, Fc_ep]): for item in ["rangecheck_upper", "rangecheck_lower"]: if item in of["Attr"]: ep["Attr"][item] = of["Attr"][item] # apply QC to the EddyPro data pfp_ck.ApplyRangeCheckToVariable(us_ep) pfp_ck.ApplyRangeCheckToVariable(Fc_ep) pfp_ck.ApplyRangeCheckToVariable(Fe_ep) pfp_ck.ApplyRangeCheckToVariable(Fh_ep) # plot the comparison plt.ion() fig = plt.figure(1,figsize=(8,8)) pfp_plot.xyplot(us_ep["Data"],us_of["Data"],sub=[2,2,1],regr=2,xlabel='u*_EP (m/s)',ylabel='u*_OF (m/s)') pfp_plot.xyplot(Fh_ep["Data"],Fh_of["Data"],sub=[2,2,2],regr=2,xlabel='Fh_EP (W/m2)',ylabel='Fh_OF (W/m2)') pfp_plot.xyplot(Fe_ep["Data"],Fe_of["Data"],sub=[2,2,3],regr=2,xlabel='Fe_EP (W/m2)',ylabel='Fe_OF (W/m2)') pfp_plot.xyplot(Fc_ep["Data"],Fc_of["Data"],sub=[2,2,4],regr=2,xlabel='Fc_EP (umol/m2/s)',ylabel='Fc_OF (umol/m2/s)') plt.tight_layout() plt.draw() plt.ioff()
def gfMDS_make_data_array(ds, current_year, info): """ Purpose: Create a data array for the MDS gap filling routine. The array constructed here will be written to a CSV file that is read by the MDS C code. Usage: Side Effects: The constructed data arrays are full years. That is they run from YYYY-01-01 00:30 to YYYY+1-01-01 00:00. Missing data is represented as -9999. Author: PRI Date: May 2018 """ ldt = pfp_utils.GetVariable(ds, "DateTime") nrecs = int(ds.globalattributes["nc_nrecs"]) ts = int(ds.globalattributes["time_step"]) start = datetime.datetime(current_year, 1, 1, 0, 30, 0) end = datetime.datetime(current_year + 1, 1, 1, 0, 0, 0) cdt = numpy.array([ dt for dt in pfp_utils.perdelta(start, end, datetime.timedelta( minutes=ts)) ]) mt = numpy.ones(len(cdt)) * float(-9999) # need entry for the timestamp and the target ... array_list = [cdt, mt] # ... and entries for the drivers for driver in info["drivers"]: array_list.append(mt) # now we can create the data array data = numpy.stack(array_list, axis=-1) si = pfp_utils.GetDateIndex(ldt["Data"], start, default=0) ei = pfp_utils.GetDateIndex(ldt["Data"], end, default=nrecs) dt = pfp_utils.GetVariable(ds, "DateTime", start=si, end=ei) idx1, _ = pfp_utils.FindMatchingIndices(cdt, dt["Data"]) pfp_label_list = [info["target"]] + info["drivers"] mds_label_list = [info["target_mds"]] + info["drivers_mds"] header = "TIMESTAMP" fmt = "%12i" for n, label in enumerate(pfp_label_list): var = pfp_utils.GetVariable(ds, label, start=si, end=ei) data[idx1, n + 1] = var["Data"] header = header + "," + mds_label_list[n] fmt = fmt + "," + "%f" # convert datetime to ISO dates data[:, 0] = numpy.array([int(xdt.strftime("%Y%m%d%H%M")) for xdt in cdt]) return data, header, fmt
def make_data_array(ds, current_year): ldt = pfp_utils.GetVariable(ds, "DateTime") nrecs = ds.globalattributes["nc_nrecs"] ts = int(ds.globalattributes["time_step"]) start = datetime.datetime(current_year,1,1,0,30,0) end = datetime.datetime(current_year+1,1,1,0,0,0) cdt = numpy.array([dt for dt in pfp_utils.perdelta(start, end, datetime.timedelta(minutes=ts))]) mt = numpy.ones(len(cdt))*float(-9999) data = numpy.stack([cdt, mt, mt, mt, mt, mt, mt, mt], axis=-1) si = pfp_utils.GetDateIndex(ldt["Data"], start, default=0) ei = pfp_utils.GetDateIndex(ldt["Data"], end, default=nrecs) dt = pfp_utils.GetVariable(ds, "DateTime", start=si, end=ei) idx1, idx2 = pfp_utils.FindMatchingIndices(cdt, dt["Data"]) for n, label in enumerate(["Fc", "VPD", "ustar", "Ta", "Fsd", "Fh", "Fe"]): var = pfp_utils.GetVariable(ds, label, start=si, end=ei) data[idx1,n+1] = var["Data"] # convert datetime to ISO dates data[:,0] = numpy.array([int(xdt.strftime("%Y%m%d%H%M")) for xdt in cdt]) return data
def climatology(cf): nc_filename = pfp_io.get_infilenamefromcf(cf) if not pfp_utils.file_exists(nc_filename): return xl_filename = nc_filename.replace(".nc","_Climatology.xls") xlFile = xlwt.Workbook() ds = pfp_io.nc_read_series(nc_filename) # calculate Fa if it is not in the data structure got_Fa = True if "Fa" not in ds.series.keys(): if "Fn" in ds.series.keys() and "Fg" in ds.series.keys(): pfp_ts.CalculateAvailableEnergy(ds,Fa_out='Fa',Fn_in='Fn',Fg_in='Fg') else: got_Fa = False logger.warning(" Fn or Fg not in data struicture") # get the time step ts = int(ds.globalattributes['time_step']) # get the site name SiteName = ds.globalattributes['site_name'] # get the datetime series dt = ds.series['DateTime']['Data'] Hdh = numpy.array([(d.hour + d.minute/float(60)) for d in dt]) Month = numpy.array([d.month for d in dt]) # get the initial start and end dates StartDate = str(dt[0]) EndDate = str(dt[-1]) # find the start index of the first whole day (time=00:30) si = pfp_utils.GetDateIndex(dt,StartDate,ts=ts,default=0,match='startnextday') # find the end index of the last whole day (time=00:00) ei = pfp_utils.GetDateIndex(dt,EndDate,ts=ts,default=-1,match='endpreviousday') # get local views of the datetime series ldt = dt[si:ei+1] Hdh = Hdh[si:ei+1] Month = Month[si:ei+1] # get the number of time steps in a day and the number of days in the data ntsInDay = int(24.0*60.0/float(ts)) nDays = int(len(ldt))/ntsInDay for ThisOne in cf['Variables'].keys(): if "AltVarName" in cf['Variables'][ThisOne].keys(): ThisOne = cf['Variables'][ThisOne]["AltVarName"] if ThisOne in ds.series.keys(): logger.info(" Doing climatology for "+ThisOne) data,f,a = pfp_utils.GetSeriesasMA(ds,ThisOne,si=si,ei=ei) if numpy.ma.count(data)==0: logger.warning(" No data for "+ThisOne+", skipping ...") continue fmt_str = get_formatstring(cf,ThisOne,fmt_def='') xlSheet = xlFile.add_sheet(ThisOne) Av_all = do_diurnalstats(Month,Hdh,data,xlSheet,format_string=fmt_str,ts=ts) # now do it for each day # we want to preserve any data that has been truncated by the use of the "startnextday" # and "endpreviousday" match options used above. Here we revisit the start and end indices # and adjust these backwards and forwards respectively if data has been truncated. nDays_daily = nDays ei_daily = ei si_daily = si sdate = ldt[0] edate = ldt[-1] # is there data after the current end date? if dt[-1]>ldt[-1]: # if so, push the end index back by 1 day so it is included ei_daily = ei + ntsInDay nDays_daily = nDays_daily + 1 edate = ldt[-1]+datetime.timedelta(days=1) # is there data before the current start date? if dt[0]<ldt[0]: # if so, push the start index back by 1 day so it is included si_daily = si - ntsInDay nDays_daily = nDays_daily + 1 sdate = ldt[0]-datetime.timedelta(days=1) # get the data and use the "pad" option to add missing data if required to # complete the extra days data,f,a = pfp_utils.GetSeriesasMA(ds,ThisOne,si=si_daily,ei=ei_daily,mode="pad") data_daily = data.reshape(nDays_daily,ntsInDay) xlSheet = xlFile.add_sheet(ThisOne+'(day)') write_data_1columnpertimestep(xlSheet, data_daily, ts, startdate=sdate, format_string=fmt_str) data_daily_i = do_2dinterpolation(data_daily) xlSheet = xlFile.add_sheet(ThisOne+'i(day)') write_data_1columnpertimestep(xlSheet, data_daily_i, ts, startdate=sdate, format_string=fmt_str) else: logger.warning(" Requested variable "+ThisOne+" not in data structure") continue logger.info(" Saving Excel file "+os.path.split(xl_filename)[1]) xlFile.save(xl_filename)
def get_LL_params(ldt, Fsd, D, T, NEE, ER, LT_results, l6_info, output): # Lasslop as it was written in Lasslop et al (2010), mostly ... # Actually, the only intended difference is the window length and offset # Lasslop et al used window_length=4, window_offset=2 # local pointers to entries in the info dictionary iel = l6_info["ERUsingLasslop"] ielo = iel["outputs"] ieli = iel["info"] # window and step sizes window_size_days = ielo[output]["window_size_days"] step_size_days = ielo[output]["step_size_days"] # initialise results, missed dates and prior dictionaries mta = numpy.array([]) LL_results = { "start_date": mta, "mid_date": mta, "end_date": mta, "alpha": mta, "beta": mta, "k": mta, "rb": mta, "alpha_low": mta, "rb_low": mta, "rb_prior": mta, "E0": mta } LL_prior = {"rb": 1.0, "alpha": 0.01, "beta": 10, "k": 0} LL_fixed = {"D0": 1} D0 = LL_fixed["D0"] drivers = {} start_date = ldt[0] last_date = ldt[-1] end_date = start_date + datetime.timedelta(days=window_size_days) while end_date <= last_date: sub_results = {"RMSE": [], "alpha": [], "beta": [], "k": [], "rb": []} si = pfp_utils.GetDateIndex(ldt, str(start_date), ts=ieli["time_step"]) ei = pfp_utils.GetDateIndex(ldt, str(end_date), ts=ieli["time_step"]) drivers["Fsd"] = numpy.ma.compressed(Fsd[si:ei + 1]) drivers["D"] = numpy.ma.compressed(D[si:ei + 1]) drivers["T"] = numpy.ma.compressed(T[si:ei + 1]) Fsdsub = numpy.ma.compressed(Fsd[si:ei + 1]) Dsub = numpy.ma.compressed(D[si:ei + 1]) Tsub = numpy.ma.compressed(T[si:ei + 1]) NEEsub = numpy.ma.compressed(NEE[si:ei + 1]) ERsub = numpy.ma.compressed(ER[si:ei + 1]) mid_date = start_date + (end_date - start_date) / 2 # get the value of E0 for the period closest to the mid-point of this period diffs = [abs(dt - mid_date) for dt in LT_results["mid_date"]] val, idx = min((val, idx) for (idx, val) in enumerate(diffs)) LL_results["E0"] = numpy.append(LL_results["E0"], LT_results["E0_int"][idx]) LL_results["start_date"] = numpy.append(LL_results["start_date"], start_date) LL_results["mid_date"] = numpy.append(LL_results["mid_date"], mid_date) LL_results["end_date"] = numpy.append(LL_results["end_date"], end_date) if len(NEEsub) >= 10: # alpha and rb from linear fit between NEE and Fsd at low light levels #idx = numpy.where(drivers["Fsd"] < 100)[0] idx = numpy.where(Fsdsub < 100)[0] if len(idx) >= 2: #alpha_low, rb_low = numpy.polyfit(drivers["Fsd"][idx], NEEsub[idx], 1) alpha_low, rb_low = numpy.polyfit(Fsd[idx], NEEsub[idx], 1) else: alpha_low, rb_low = numpy.nan, numpy.nan if len(ERsub) >= 10: LL_prior["rb"] = numpy.mean(ERsub) for bm in [0.5, 1, 2]: LL_prior["beta"] = numpy.abs( numpy.percentile(NEEsub, 3) - numpy.percentile(NEEsub, 97)) LL_prior["beta"] = bm * LL_prior["beta"] E0 = LL_results["E0"][-1] p0 = [ LL_prior["alpha"], LL_prior["beta"], LL_prior["k"], LL_prior["rb"] ] try: fopt = lambda x, alpha, beta, k, rb: NEE_RHLRC_D( x, alpha, beta, k, D0, rb, E0) #popt,pcov = curve_fit(fopt,drivers,NEEsub,p0=p0) popt, pcov = curve_fit(fopt, [Fsdsub, Dsub, Tsub], NEEsub, p0=p0) alpha, beta, k, rb = popt[0], popt[1], popt[2], popt[3] last_alpha_OK = True except RuntimeError: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan last_alpha_OK = False # QC the parameters # k first if numpy.isnan(k) or k < 0 or k > 2: k = 0 try: p0 = [ LL_prior["alpha"], LL_prior["beta"], LL_prior["rb"] ] fopt = lambda x, alpha, beta, rb: NEE_RHLRC_D( x, alpha, beta, k, D0, rb, E0) #popt,pcov = curve_fit(fopt,drivers,NEEsub,p0=p0) popt, pcov = curve_fit(fopt, [Fsdsub, Dsub, Tsub], NEEsub, p0=p0) alpha, beta, rb = popt[0], popt[1], popt[2] last_alpha_OK = True except RuntimeError: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan last_alpha_OK = False # then alpha if numpy.isnan(alpha) or alpha < 0 or alpha > 0.22: if last_alpha_OK == True and len(LL_results["alpha"]) > 0: alpha = LL_results["alpha"][-1] else: alpha = 0 try: p0 = [LL_prior["beta"], LL_prior["k"], LL_prior["rb"]] fopt = lambda x, beta, k, rb: NEE_RHLRC_D( x, alpha, beta, k, D0, rb, E0) #popt,pcov = curve_fit(fopt,drivers,NEEsub,p0=p0) popt, pcov = curve_fit(fopt, [Fsdsub, Dsub, Tsub], NEEsub, p0=p0) beta, k, rb = popt[0], popt[1], popt[2] except RuntimeError: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan # then beta if beta < 0: beta = 0 try: p0 = [LL_prior["alpha"], LL_prior["k"], LL_prior["rb"]] fopt = lambda x, alpha, k, rb: NEE_RHLRC_D( x, alpha, beta, k, D0, rb, E0) #popt,pcov = curve_fit(fopt,drivers,NEEsub,p0=p0) popt, pcov = curve_fit(fopt, [Fsdsub, Dsub, Tsub], NEEsub, p0=p0) alpha, k, rb = popt[0], popt[1], popt[2] except RuntimeError: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan elif beta > 250: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan # and finally rb if rb < 0: alpha, beta, k, rb = numpy.nan, numpy.nan, numpy.nan, numpy.nan # now get the RMSE for this set of parameters if not numpy.isnan(alpha) and not numpy.isnan( beta) and not numpy.isnan(k) and not numpy.isnan(rb): #NEEest = NEE_RHLRC_D(drivers,alpha,beta,k,D0,rb,E0) NEEest = NEE_RHLRC_D([Fsdsub, Dsub, Tsub], alpha, beta, k, D0, rb, E0) sub_results["RMSE"].append( numpy.sqrt(numpy.mean((NEEsub - NEEest)**2))) sub_results["alpha"].append(alpha) sub_results["beta"].append(beta) sub_results["k"].append(k) sub_results["rb"].append(rb) # now find the minimum RMSE and the set of parameters for the minimum if len(sub_results["RMSE"]) != 0: min_RMSE = min(sub_results["RMSE"]) idx = sub_results["RMSE"].index(min_RMSE) LL_results["alpha"] = numpy.append(LL_results["alpha"], sub_results["alpha"][idx]) LL_results["alpha_low"] = numpy.append(LL_results["alpha_low"], float(-1) * alpha_low) LL_results["rb"] = numpy.append(LL_results["rb"], sub_results["rb"][idx]) LL_results["rb_low"] = numpy.append(LL_results["rb_low"], rb_low) LL_results["rb_prior"] = numpy.append(LL_results["rb_prior"], LL_prior["rb"]) LL_results["beta"] = numpy.append(LL_results["beta"], sub_results["beta"][idx]) LL_results["k"] = numpy.append(LL_results["k"], sub_results["k"][idx]) else: LL_results["alpha"] = numpy.append(LL_results["alpha"], numpy.nan) LL_results["alpha_low"] = numpy.append(LL_results["alpha_low"], float(-1) * alpha_low) LL_results["rb"] = numpy.append(LL_results["rb"], numpy.nan) LL_results["rb_low"] = numpy.append(LL_results["rb_low"], rb_low) LL_results["rb_prior"] = numpy.append(LL_results["rb_prior"], LL_prior["rb"]) LL_results["beta"] = numpy.append(LL_results["beta"], numpy.nan) LL_results["k"] = numpy.append(LL_results["k"], numpy.nan) else: LL_results["alpha"] = numpy.append(LL_results["alpha"], numpy.nan) LL_results["alpha_low"] = numpy.append(LL_results["alpha_low"], numpy.nan) LL_results["rb"] = numpy.append(LL_results["rb"], numpy.nan) LL_results["rb_low"] = numpy.append(LL_results["rb_low"], numpy.nan) LL_results["rb_prior"] = numpy.append(LL_results["rb_prior"], LL_prior["rb"]) LL_results["beta"] = numpy.append(LL_results["beta"], numpy.nan) LL_results["k"] = numpy.append(LL_results["k"], numpy.nan) # update the start and end datetimes start_date = start_date + datetime.timedelta(days=window_size_days) end_date = start_date + datetime.timedelta(days=step_size_days) LL_results["D0"] = D0 return LL_results
def get_LT_params(ldt, ER, T, l6_info, output, mode="verbose"): """ Purpose: Returns rb and E0 for the Lloyd & Taylor respiration function. Usage: Author: PRI Date: April 2016 """ # local pointers to entries in the info dictionary iel = l6_info["ERUsingLasslop"] ielo = iel["outputs"] ieli = iel["info"] # window and step sizes window_step_size = ielo[output]["window_size_days"] step_size_days = ielo[output]["step_size_days"] # initialise results, missed dates and prior dictionaries mta = numpy.array([]) LT_results = { "start_date": mta, "mid_date": mta, "end_date": mta, "rb": mta, "E0": mta, "rb_prior": mta, "E0_prior": mta } missed_dates = {"start_date": [], "end_date": []} LT_prior = {"rb": 1.0, "E0": 100} # get the start and end date start_date = ldt[0] last_date = ldt[-1] end_date = start_date + datetime.timedelta( days=ielo[output]["window_size_days"]) last_E0_OK = False while end_date <= last_date: LT_results["start_date"] = numpy.append(LT_results["start_date"], start_date) LT_results["mid_date"] = numpy.append( LT_results["mid_date"], start_date + (end_date - start_date) / 2) LT_results["end_date"] = numpy.append(LT_results["end_date"], end_date) si = pfp_utils.GetDateIndex(ldt, str(start_date), ts=ieli["time_step"]) ei = pfp_utils.GetDateIndex(ldt, str(end_date), ts=ieli["time_step"]) Tsub = numpy.ma.compressed(T[si:ei + 1]) ERsub = numpy.ma.compressed(ER[si:ei + 1]) if len(ERsub) >= 10: LT_prior["rb"] = numpy.mean(ERsub) p0 = [LT_prior["rb"], LT_prior["E0"]] try: popt, pcov = curve_fit(ER_LloydTaylor, Tsub, ERsub, p0=p0) except RuntimeError: missed_dates["start_date"].append(start_date) missed_dates["end_date"].append(end_date) # QC E0 results if popt[1] < 50 or popt[1] > 400: if last_E0_OK: popt[1] = LT_results["E0"][-1] last_E0_OK = False else: if popt[1] < 50: popt[1] = float(50) if popt[1] > 400: popt[1] = float(400) last_E0_OK = False # now recalculate rb p0 = LT_prior["rb"] if numpy.isnan(popt[1]): popt[1] = float(50) E0 = numpy.ones(len(Tsub)) * float(popt[1]) popt1, pcov1 = curve_fit(ER_LloydTaylor_fixedE0, [Tsub, E0], ERsub, p0=p0) popt[0] = popt1[0] else: last_E0_OK = True # QC rb results if popt[0] < 0: popt[0] = float(0) LT_results["rb"] = numpy.append(LT_results["rb"], popt[0]) LT_results["E0"] = numpy.append(LT_results["E0"], popt[1]) LT_results["rb_prior"] = numpy.append(LT_results["rb_prior"], numpy.mean(ERsub)) LT_results["E0_prior"] = numpy.append(LT_results["E0_prior"], LT_prior["E0"]) else: LT_results["rb"] = numpy.append(LT_results["rb"], numpy.nan) LT_results["E0"] = numpy.append(LT_results["E0"], numpy.nan) LT_results["rb_prior"] = numpy.append(LT_results["rb_prior"], numpy.nan) LT_results["E0_prior"] = numpy.append(LT_results["E0_prior"], numpy.nan) start_date = start_date + datetime.timedelta( days=ielo[output]["window_size_days"]) end_date = start_date + datetime.timedelta( days=ielo[output]["step_size_days"]) # start_date = end_date # end_date = start_date+dateutil.relativedelta.relativedelta(years=1) if mode == "verbose": if len(missed_dates["start_date"]) != 0: msg = " No solution found for the following dates:" logger.warning(msg) for sd, ed in zip(missed_dates["start_date"], missed_dates["end_date"]): msg = " " + str(sd) + " to " + str(ed) logger.warning(msg) return LT_results
def climatology(cf): nc_filename = pfp_io.get_infilenamefromcf(cf) if not pfp_utils.file_exists(nc_filename): return xl_filename = nc_filename.replace(".nc", "_Climatology.xls") xlFile = xlwt.Workbook() ds = pfp_io.nc_read_series(nc_filename) # calculate Fa if it is not in the data structure got_Fa = True if "Fa" not in ds.series.keys(): if "Fn" in ds.series.keys() and "Fg" in ds.series.keys(): pfp_ts.CalculateAvailableEnergy(ds, Fa_out='Fa', Fn_in='Fn', Fg_in='Fg') else: got_Fa = False logger.warning(" Fn or Fg not in data struicture") # get the time step ts = int(ds.globalattributes['time_step']) # get the site name SiteName = ds.globalattributes['site_name'] # get the datetime series dt = ds.series['DateTime']['Data'] Hdh = numpy.array([(d.hour + d.minute / float(60)) for d in dt]) Month = numpy.array([d.month for d in dt]) # get the initial start and end dates StartDate = str(dt[0]) EndDate = str(dt[-1]) # find the start index of the first whole day (time=00:30) si = pfp_utils.GetDateIndex(dt, StartDate, ts=ts, default=0, match='startnextday') # find the end index of the last whole day (time=00:00) ei = pfp_utils.GetDateIndex(dt, EndDate, ts=ts, default=-1, match='endpreviousday') # get local views of the datetime series ldt = dt[si:ei + 1] Hdh = Hdh[si:ei + 1] Month = Month[si:ei + 1] # get the number of time steps in a day and the number of days in the data ntsInDay = int(24.0 * 60.0 / float(ts)) nDays = int(len(ldt)) / ntsInDay for ThisOne in cf['Variables'].keys(): if "AltVarName" in cf['Variables'][ThisOne].keys(): ThisOne = cf['Variables'][ThisOne]["AltVarName"] if ThisOne in ds.series.keys(): logger.info(" Doing climatology for " + ThisOne) data, f, a = pfp_utils.GetSeriesasMA(ds, ThisOne, si=si, ei=ei) if numpy.ma.count(data) == 0: logger.warning(" No data for " + ThisOne + ", skipping ...") continue fmt_str = get_formatstring(cf, ThisOne, fmt_def='') xlSheet = xlFile.add_sheet(ThisOne) Av_all = do_diurnalstats(Month, Hdh, data, xlSheet, format_string=fmt_str, ts=ts) # now do it for each day # we want to preserve any data that has been truncated by the use of the "startnextday" # and "endpreviousday" match options used above. Here we revisit the start and end indices # and adjust these backwards and forwards respectively if data has been truncated. nDays_daily = nDays ei_daily = ei si_daily = si sdate = ldt[0] edate = ldt[-1] # is there data after the current end date? if dt[-1] > ldt[-1]: # if so, push the end index back by 1 day so it is included ei_daily = ei + ntsInDay nDays_daily = nDays_daily + 1 edate = ldt[-1] + datetime.timedelta(days=1) # is there data before the current start date? if dt[0] < ldt[0]: # if so, push the start index back by 1 day so it is included si_daily = si - ntsInDay nDays_daily = nDays_daily + 1 sdate = ldt[0] - datetime.timedelta(days=1) # get the data and use the "pad" option to add missing data if required to # complete the extra days data, f, a = pfp_utils.GetSeriesasMA(ds, ThisOne, si=si_daily, ei=ei_daily, mode="pad") data_daily = data.reshape(nDays_daily, ntsInDay) xlSheet = xlFile.add_sheet(ThisOne + '(day)') write_data_1columnpertimestep(xlSheet, data_daily, ts, startdate=sdate, format_string=fmt_str) data_daily_i = do_2dinterpolation(data_daily) xlSheet = xlFile.add_sheet(ThisOne + 'i(day)') write_data_1columnpertimestep(xlSheet, data_daily_i, ts, startdate=sdate, format_string=fmt_str) elif ThisOne == "EF" and got_Fa: logger.info(" Doing evaporative fraction") EF = numpy.ma.zeros([48, 12]) + float(c.missing_value) Hdh, f, a = pfp_utils.GetSeriesasMA(ds, 'Hdh', si=si, ei=ei) Fa, f, a = pfp_utils.GetSeriesasMA(ds, 'Fa', si=si, ei=ei) Fe, f, a = pfp_utils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) for m in range(1, 13): mi = numpy.where(Month == m)[0] Fa_Num, Hr, Fa_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fa[mi], ts) Fe_Num, Hr, Fe_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fe[mi], ts) index = numpy.ma.where((Fa_Num > 4) & (Fe_Num > 4)) EF[:, m - 1][index] = Fe_Av[index] / Fa_Av[index] # reject EF values greater than upper limit or less than lower limit upr, lwr = get_rangecheck_limit(cf, 'EF') EF = numpy.ma.filled( numpy.ma.masked_where((EF > upr) | (EF < lwr), EF), float(c.missing_value)) # write the EF to the Excel file xlSheet = xlFile.add_sheet('EF') write_data_1columnpermonth(xlSheet, EF, ts, format_string='0.00') # do the 2D interpolation to fill missing EF values EFi = do_2dinterpolation(EF) xlSheet = xlFile.add_sheet('EFi') write_data_1columnpermonth(xlSheet, EFi, ts, format_string='0.00') # now do EF for each day Fa, f, a = pfp_utils.GetSeriesasMA(ds, 'Fa', si=si, ei=ei) Fe, f, a = pfp_utils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) EF = Fe / Fa EF = numpy.ma.filled( numpy.ma.masked_where((EF > upr) | (EF < lwr), EF), float(c.missing_value)) EF_daily = EF.reshape(nDays, ntsInDay) xlSheet = xlFile.add_sheet('EF(day)') write_data_1columnpertimestep(xlSheet, EF_daily, ts, startdate=ldt[0], format_string='0.00') EFi = do_2dinterpolation(EF_daily) xlSheet = xlFile.add_sheet('EFi(day)') write_data_1columnpertimestep(xlSheet, EFi, ts, startdate=ldt[0], format_string='0.00') elif ThisOne == "BR": logger.info(" Doing Bowen ratio") BR = numpy.ma.zeros([48, 12]) + float(c.missing_value) Fe, f, a = pfp_utils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) Fh, f, a = pfp_utils.GetSeriesasMA(ds, 'Fh', si=si, ei=ei) for m in range(1, 13): mi = numpy.where(Month == m)[0] Fh_Num, Hr, Fh_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fh[mi], ts) Fe_Num, Hr, Fe_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fe[mi], ts) index = numpy.ma.where((Fh_Num > 4) & (Fe_Num > 4)) BR[:, m - 1][index] = Fh_Av[index] / Fe_Av[index] # reject BR values greater than upper limit or less than lower limit upr, lwr = get_rangecheck_limit(cf, 'BR') BR = numpy.ma.filled( numpy.ma.masked_where((BR > upr) | (BR < lwr), BR), float(c.missing_value)) # write the BR to the Excel file xlSheet = xlFile.add_sheet('BR') write_data_1columnpermonth(xlSheet, BR, ts, format_string='0.00') # do the 2D interpolation to fill missing EF values BRi = do_2dinterpolation(BR) xlSheet = xlFile.add_sheet('BRi') write_data_1columnpermonth(xlSheet, BRi, ts, format_string='0.00') # now do BR for each day ... Fe, f, a = pfp_utils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) Fh, f, a = pfp_utils.GetSeriesasMA(ds, 'Fh', si=si, ei=ei) BR = Fh / Fe BR = numpy.ma.filled( numpy.ma.masked_where((BR > upr) | (BR < lwr), BR), float(c.missing_value)) BR_daily = BR.reshape(nDays, ntsInDay) xlSheet = xlFile.add_sheet('BR(day)') write_data_1columnpertimestep(xlSheet, BR_daily, ts, startdate=ldt[0], format_string='0.00') BRi = do_2dinterpolation(BR_daily) xlSheet = xlFile.add_sheet('BRi(day)') write_data_1columnpertimestep(xlSheet, BRi, ts, startdate=ldt[0], format_string='0.00') elif ThisOne == "WUE": logger.info(" Doing ecosystem WUE") WUE = numpy.ma.zeros([48, 12]) + float(c.missing_value) Fe, f, a = pfp_utils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) Fc, f, a = pfp_utils.GetSeriesasMA(ds, 'Fc', si=si, ei=ei) for m in range(1, 13): mi = numpy.where(Month == m)[0] Fc_Num, Hr, Fc_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fc[mi], ts) Fe_Num, Hr, Fe_Av, Sd, Mx, Mn = get_diurnalstats( Hdh[mi], Fe[mi], ts) index = numpy.ma.where((Fc_Num > 4) & (Fe_Num > 4)) WUE[:, m - 1][index] = Fc_Av[index] / Fe_Av[index] # reject WUE values greater than upper limit or less than lower limit upr, lwr = get_rangecheck_limit(cf, 'WUE') WUE = numpy.ma.filled( numpy.ma.masked_where((WUE > upr) | (WUE < lwr), WUE), float(c.missing_value)) # write the WUE to the Excel file xlSheet = xlFile.add_sheet('WUE') write_data_1columnpermonth(xlSheet, WUE, ts, format_string='0.00000') # do the 2D interpolation to fill missing EF values WUEi = do_2dinterpolation(WUE) xlSheet = xlFile.add_sheet('WUEi') write_data_1columnpermonth(xlSheet, WUEi, ts, format_string='0.00000') # now do WUE for each day ... Fe, f, a = pfp_utils.GetSeriesasMA(ds, 'Fe', si=si, ei=ei) Fc, f, a = pfp_utils.GetSeriesasMA(ds, 'Fc', si=si, ei=ei) WUE = Fc / Fe WUE = numpy.ma.filled( numpy.ma.masked_where((WUE > upr) | (WUE < lwr), WUE), float(c.missing_value)) WUE_daily = WUE.reshape(nDays, ntsInDay) xlSheet = xlFile.add_sheet('WUE(day)') write_data_1columnpertimestep(xlSheet, WUE_daily, ts, startdate=ldt[0], format_string='0.00000') WUEi = do_2dinterpolation(WUE_daily) xlSheet = xlFile.add_sheet('WUEi(day)') write_data_1columnpertimestep(xlSheet, WUEi, ts, startdate=ldt[0], format_string='0.00000') else: logger.warning(" Requested variable " + ThisOne + " not in data structure") continue logger.info(" Saving Excel file " + os.path.split(xl_filename)[1]) xlFile.save(xl_filename)