def ImportSeries(cf,ds): # check to see if there is an Imports section if "Imports" not in cf.keys(): return # number of records nRecs = int(ds.globalattributes["nc_nrecs"]) # get the start and end datetime ldt = ds.series["DateTime"]["Data"] start_date = ldt[0] end_date = ldt[-1] # loop over the series in the Imports section for label in cf["Imports"].keys(): import_filename = pfp_utils.get_keyvaluefromcf(cf,["Imports",label],"file_name",default="") if import_filename=="": msg = " ImportSeries: import filename not found in control file, skipping ..." logger.warning(msg) continue var_name = pfp_utils.get_keyvaluefromcf(cf,["Imports",label],"var_name",default="") if var_name=="": msg = " ImportSeries: variable name not found in control file, skipping ..." logger.warning(msg) continue ds_import = pfp_io.nc_read_series(import_filename) ts_import = ds_import.globalattributes["time_step"] ldt_import = ds_import.series["DateTime"]["Data"] si = pfp_utils.GetDateIndex(ldt_import,str(start_date),ts=ts_import,default=0,match="exact") ei = pfp_utils.GetDateIndex(ldt_import,str(end_date),ts=ts_import,default=len(ldt_import)-1,match="exact") data = numpy.ma.ones(nRecs)*float(c.missing_value) flag = numpy.ma.ones(nRecs) data_import,flag_import,attr_import = pfp_utils.GetSeriesasMA(ds_import,var_name,si=si,ei=ei) ldt_import = ldt_import[si:ei+1] index = pfp_utils.FindIndicesOfBInA(ldt_import,ldt) data[index] = data_import flag[index] = flag_import pfp_utils.CreateSeries(ds,label,data,flag,attr_import)
def MRfromRH(ds, MR_out, RH_in, Ta_in, ps_in): """ Purpose: Calculate H2O mixing ratio from RH. """ nRecs = int(ds.globalattributes["nc_nrecs"]) zeros = numpy.zeros(nRecs, dtype=numpy.int32) ones = numpy.ones(nRecs, dtype=numpy.int32) for item in [RH_in, Ta_in, ps_in]: if item not in ds.series.keys(): msg = " MRfromRH: Requested series " + item + " not found, " + MR_out + " not calculated" logger.error(msg) return 0 if MR_out in ds.series.keys(): msg = " MRfromRH: Output series " + MR_out + " already exists, skipping ..." logger.error(msg) return 0 RH_data, RH_flag, RH_attr = pfp_utils.GetSeriesasMA(ds, RH_in) Ta_data, Ta_flag, Ta_attr = pfp_utils.GetSeriesasMA(ds, Ta_in) Ah_data = pfp_mf.absolutehumidityfromRH(Ta_data, RH_data) ps_data, ps_flag, ps_attr = pfp_utils.GetSeriesasMA(ds, ps_in) MR_data = pfp_mf.h2o_mmolpmolfromgpm3(Ah_data, Ta_data, ps_data) MR_attr = pfp_utils.MakeAttributeDictionary( long_name="H2O mixing ratio calculated from " + RH_in + ", " + Ta_in + " and " + ps_in, height=RH_attr["height"], units="mmol/mol") flag = numpy.where(numpy.ma.getmaskarray(MR_data) == True, ones, zeros) pfp_utils.CreateSeries(ds, MR_out, MR_data, flag, MR_attr) return 1
def AhfromRH(ds, Ah_out, RH_in, Ta_in): """ Purpose: Function to calculate absolute humidity given relative humidity and air temperature. Absolute humidity is not calculated if any of the input series are missing or if the specified output series already exists in the data structure. The calculated absolute humidity is created as a new series in the data structure. Usage: pfp_func.AhfromRH(ds,"Ah_HMP_2m","RH_HMP_2m","Ta_HMP_2m") Author: PRI Date: September 2015 """ nRecs = int(ds.globalattributes["nc_nrecs"]) zeros = numpy.zeros(nRecs,dtype=numpy.int32) ones = numpy.ones(nRecs,dtype=numpy.int32) for item in [RH_in,Ta_in]: if item not in ds.series.keys(): msg = " AhfromRH: Requested series "+item+" not found, "+Ah_out+" not calculated" logger.error(msg) return 0 if Ah_out in ds.series.keys(): msg = " AhfromRH: Output series "+Ah_out+" already exists, skipping ..." logger.error(msg) return 0 RH_data,RH_flag,RH_attr = pfp_utils.GetSeriesasMA(ds,RH_in) Ta_data,Ta_flag,Ta_attr = pfp_utils.GetSeriesasMA(ds,Ta_in) Ah_data = pfp_mf.absolutehumidityfromRH(Ta_data,RH_data) Ah_attr = pfp_utils.MakeAttributeDictionary(long_name="Absolute humidity calculated from "+RH_in+" and "+Ta_in, height=RH_attr["height"], units="g/m3") flag = numpy.where(numpy.ma.getmaskarray(Ah_data)==True,ones,zeros) pfp_utils.CreateSeries(ds,Ah_out,Ah_data,flag,Ah_attr) return 1
def gfClimatology_interpolateddaily(ds,series,output,xlbooks): """ Gap fill using data interpolated over a 2D array where the days are the rows and the time of day is the columns. """ # gap fill from interpolated 30 minute data xlfilename = ds.climatology[output]["file_name"] sheet_name = series+'i(day)' if sheet_name not in xlbooks[xlfilename].sheet_names(): msg = " gfClimatology: sheet "+sheet_name+" not found, skipping ..." logger.warning(msg) return ldt = ds.series["DateTime"]["Data"] thissheet = xlbooks[xlfilename].sheet_by_name(sheet_name) datemode = xlbooks[xlfilename].datemode basedate = datetime.datetime(1899, 12, 30) nts = thissheet.ncols - 1 ndays = thissheet.nrows - 2 # read the time stamp values from the climatology worksheet tsteps = thissheet.row_values(1,start_colx=1,end_colx=nts+1) # read the data from the climatology workbook val1d = numpy.ma.zeros(ndays*nts,dtype=numpy.float64) # initialise an array for the datetime of the climatological values cdt = [None]*nts*ndays # loop over the rows (days) of data for xlRow in range(ndays): # get the Excel datetime value xldatenumber = int(thissheet.cell_value(xlRow+2,0)) # convert this to a Python Datetime xldatetime = basedate+datetime.timedelta(days=xldatenumber+1462*datemode) # fill the climatology datetime array cdt[xlRow*nts:(xlRow+1)*nts] = [xldatetime+datetime.timedelta(hours=hh) for hh in tsteps] # fill the climatological value array val1d[xlRow*nts:(xlRow+1)*nts] = thissheet.row_values(xlRow+2,start_colx=1,end_colx=nts+1) # get the data to be filled with climatological values data,flag,attr = pfp_utils.GetSeriesasMA(ds,series) # get an index of missing values idx = numpy.where(numpy.ma.getmaskarray(data)==True)[0] #idx = numpy.ma.where(numpy.ma.getmaskarray(data)==True)[0] # there must be a better way to do this ... # simply using the index (idx) to set a slice of the data array to the gap filled values in val1d # does not seem to work (mask stays true on replaced values in data), the work around is to # step through the indices, find the time of the missing value in data, find the same time in the # gap filled values val1d and set the missing element of data to this element of val1d # actually ... # this may not be the fastest but it may be the most robust because it matches dates of missing data # to dates in the climatology file for ii in idx: try: jj = pfp_utils.find_nearest_value(cdt, ldt[ii]) data[ii] = val1d[jj] flag[ii] = numpy.int32(40) except ValueError: data[ii] = numpy.float64(c.missing_value) flag[ii] = numpy.int32(41) # put the gap filled data back into the data structure pfp_utils.CreateSeries(ds,output,data,flag,attr)
def do_lowercheck(cf, ds, section, series, code=2): """ Purpose: Usage: Author: PRI Date: February 2017 """ # check to see if LowerCheck requested for this variable if "LowerCheck" not in cf[section][series]: return # Check to see if limits have been specified if len(cf[section][series]["LowerCheck"].keys()) == 0: msg = "do_lowercheck: no date ranges specified" logger.info(msg) return ldt = ds.series["DateTime"]["Data"] ts = ds.globalattributes["time_step"] data, flag, attr = pfp_utils.GetSeriesasMA(ds, series) lc_list = list(cf[section][series]["LowerCheck"].keys()) for n, item in enumerate(lc_list): # this should be a list and we should probably check for compliance lwr_info = cf[section][series]["LowerCheck"][item] attr["lowercheck_" + str(n)] = str(lwr_info) start_date = dateutil.parser.parse(lwr_info[0]) su = float(lwr_info[1]) end_date = dateutil.parser.parse(lwr_info[2]) eu = float(lwr_info[3]) # get the start and end indices si = pfp_utils.GetDateIndex(ldt, start_date, ts=ts, default=0, match="exact") ei = pfp_utils.GetDateIndex(ldt, end_date, ts=ts, default=len(ldt) - 1, match="exact") # get the segment of data between this start and end date seg_data = data[si:ei + 1] seg_flag = flag[si:ei + 1] x = numpy.arange(si, ei + 1, 1) lower = numpy.interp(x, [si, ei], [su, eu]) index = numpy.ma.where((seg_data < lower))[0] seg_data[index] = numpy.ma.masked seg_flag[index] = numpy.int32(code) data[si:ei + 1] = seg_data flag[si:ei + 1] = seg_flag # now put the data back into the data structure pfp_utils.CreateSeries(ds, series, data, Flag=flag, Attr=attr) return
def gfSOLO_createdict(cf,ds,series): """ Creates a dictionary in ds to hold information about the SOLO data used to gap fill the tower data.""" # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf,series=series,mode="quiet") # return without doing anything if the series isn't in a control file section if len(section)==0: msg = "GapFillUsingSOLO: "+series+" not found in control file, skipping ..." logger.error(msg) return # create the solo directory in the data structure if "solo" not in dir(ds): ds.solo = {} # name of SOLO output series in ds output_list = cf[section][series]["GapFillUsingSOLO"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this series ds.solo[output] = {} # get the target if "target" in cf[section][series]["GapFillUsingSOLO"][output]: ds.solo[output]["label_tower"] = cf[section][series]["GapFillUsingSOLO"][output]["target"] else: ds.solo[output]["label_tower"] = series # site name ds.solo[output]["site_name"] = ds.globalattributes["site_name"] # list of SOLO settings if "solo_settings" in cf[section][series]["GapFillUsingSOLO"][output]: ss_list = ast.literal_eval(cf[section][series]["GapFillUsingSOLO"][output]["solo_settings"]) ds.solo[output]["solo_settings"] = {} ds.solo[output]["solo_settings"]["nodes_target"] = int(ss_list[0]) ds.solo[output]["solo_settings"]["training"] = int(ss_list[1]) ds.solo[output]["solo_settings"]["factor"] = int(ss_list[2]) ds.solo[output]["solo_settings"]["learningrate"] = float(ss_list[3]) ds.solo[output]["solo_settings"]["iterations"] = int(ss_list[4]) # list of drivers ds.solo[output]["drivers"] = ast.literal_eval(cf[section][series]["GapFillUsingSOLO"][output]["drivers"]) # apply ustar filter opt = pfp_utils.get_keyvaluefromcf(cf,[section,series,"GapFillUsingSOLO",output], "turbulence_filter",default="") ds.solo[output]["turbulence_filter"] = opt opt = pfp_utils.get_keyvaluefromcf(cf,[section,series,"GapFillUsingSOLO",output], "daynight_filter",default="") ds.solo[output]["daynight_filter"] = opt # results of best fit for plotting later on ds.solo[output]["results"] = {"startdate":[],"enddate":[],"No. points":[],"r":[], "Bias":[],"RMSE":[],"Frac Bias":[],"NMSE":[], "Avg (obs)":[],"Avg (SOLO)":[], "Var (obs)":[],"Var (SOLO)":[],"Var ratio":[], "m_ols":[],"b_ols":[]} # create an empty series in ds if the SOLO output series doesn't exist yet if output not in ds.series.keys(): data,flag,attr = pfp_utils.MakeEmptySeries(ds,output) pfp_utils.CreateSeries(ds,output,data,flag,attr)
def gfClimatology_createdict(cf, ds, series): """ Creates a dictionary in ds to hold information about the climatological data used to gap fill the tower data.""" # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series,mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: msg = "GapFillFromClimatology: "+series+" not found in control file, skipping ..." logger.error(msg) return # create the climatology directory in the data structure if "climatology" not in dir(ds): ds.climatology = {} # name of alternate output series in ds output_list = cf[section][series]["GapFillFromClimatology"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this output ds.climatology[output] = {} ds.climatology[output]["label_tower"] = series # site name ds.climatology[output]["site_name"] = ds.globalattributes["site_name"] # Climatology file name file_list = cf["Files"].keys() lower_file_list = [item.lower() for item in file_list] # first, look in the [Files] section for a generic file name if "climatology" in lower_file_list: # found a generic file name i = lower_file_list.index("climatology") ds.climatology[output]["file_name"] = cf["Files"][file_list[i]] else: # no generic file name found, look for a file name in the variable section ds.climatology[output]["file_name"] = cf[section][series]["GapFillFromClimatology"][output]["file_name"] # climatology variable name if different from name used in control file if "climatology_name" in cf[section][series]["GapFillFromClimatology"][output]: ds.climatology[output]["climatology_name"] = cf[section][series]["GapFillFromClimatology"][output]["climatology_name"] else: ds.climatology[output]["climatology_name"] = series # climatology gap filling method if "method" not in cf[section][series]["GapFillFromClimatology"][output].keys(): # default if "method" missing is "interpolated_daily" ds.climatology[output]["method"] = "interpolated_daily" else: ds.climatology[output]["method"] = cf[section][series]["GapFillFromClimatology"][output]["method"] # create an empty series in ds if the climatology output series doesn't exist yet if output not in ds.series.keys(): data, flag, attr = pfp_utils.MakeEmptySeries(ds, output) pfp_utils.CreateSeries(ds, output, data, flag, attr)
def do_rangecheck(cf, ds, section, series, code=2): """ Purpose: Applies a range check to data series listed in the control file. Data values that are less than the lower limit or greater than the upper limit are replaced with c.missing_value and the corresponding QC flag element is set to 2. Usage: Author: PRI Date: Back in the day """ # check that RangeCheck has been requested for this series if 'RangeCheck' not in cf[section][series].keys(): return # check that the upper and lower limits have been given if ("Lower" not in cf[section][series]["RangeCheck"].keys() or "Upper" not in cf[section][series]["RangeCheck"].keys()): msg = "RangeCheck: key not found in control file for " + series + ", skipping ..." logger.warning(msg) return # get the upper and lower limits upr = numpy.array(eval(cf[section][series]['RangeCheck']['Upper'])) valid_upper = numpy.min(upr) upr = upr[ds.series['Month']['Data'] - 1] lwr = numpy.array(eval(cf[section][series]['RangeCheck']['Lower'])) valid_lower = numpy.min(lwr) lwr = lwr[ds.series['Month']['Data'] - 1] # get the data, flag and attributes data, flag, attr = pfp_utils.GetSeriesasMA(ds, series) # convert the data from a masked array to an ndarray so the range check works data = numpy.ma.filled(data, fill_value=c.missing_value) # get the indices of elements outside this range idx = numpy.where((data < lwr) | (data > upr))[0] # set elements outside range to missing and set the QC flag data[idx] = numpy.float64(c.missing_value) flag[idx] = numpy.int32(code) # update the variable attributes attr["rangecheck_lower"] = cf[section][series]["RangeCheck"]["Lower"] attr["rangecheck_upper"] = cf[section][series]["RangeCheck"]["Upper"] attr["valid_range"] = str(valid_lower) + "," + str(valid_upper) # and now put the data back into the data structure pfp_utils.CreateSeries(ds, series, data, Flag=flag, Attr=attr) # now we can return return
def AhfromMR(ds, Ah_out, MR_in, Ta_in, ps_in): """ Purpose: Function to calculate absolute humidity given the water vapour mixing ratio, air temperature and pressure. Absolute humidity is not calculated if any of the input series are missing or if the specified output series already exists in the data structure. The calculated absolute humidity is created as a new series in the data structure. Usage: pfp_func.AhfromMR(ds,"Ah_IRGA_Av","H2O_IRGA_Av","Ta_HMP_2m","ps") Author: PRI Date: September 2015 """ nRecs = int(ds.globalattributes["nc_nrecs"]) zeros = numpy.zeros(nRecs, dtype=numpy.int32) ones = numpy.ones(nRecs, dtype=numpy.int32) for item in [MR_in, Ta_in, ps_in]: if item not in ds.series.keys(): msg = " AhfromMR: Requested series " + item + " not found, " + Ah_out + " not calculated" logger.error(msg) return 0 if Ah_out in ds.series.keys(): msg = " AhfromMR: Output series " + Ah_out + " already exists, skipping ..." logger.error(msg) return 0 MR_data, MR_flag, MR_attr = pfp_utils.GetSeriesasMA(ds, MR_in) Ta_data, Ta_flag, Ta_attr = pfp_utils.GetSeriesasMA(ds, Ta_in) ps_data, ps_flag, ps_attr = pfp_utils.GetSeriesasMA(ds, ps_in) Ah_data = pfp_mf.h2o_gpm3frommmolpmol(MR_data, Ta_data, ps_data) long_name = "Absolute humidity calculated from " + MR_in + ", " + Ta_in + " and " + ps_in Ah_attr = pfp_utils.MakeAttributeDictionary(long_name=long_name, height=MR_attr["height"], units="g/m3") flag = numpy.where(numpy.ma.getmaskarray(Ah_data) == True, ones, zeros) pfp_utils.CreateSeries(ds, Ah_out, Ah_data, flag, Ah_attr) return 1
def gfMergeSeries_createdict(cf,ds,series): """ Creates a dictionary in ds to hold information about the merging of gap filled and tower data.""" merge_prereq_list = ["Fsd","Fsu","Fld","Flu","Ts","Sws"] # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf,series=series,mode="quiet") # create the merge directory in the data structure if "merge" not in dir(ds): ds.merge = {} # check to see if this series is in the "merge first" list # series in the "merge first" list get merged first so they can be used with existing tower # data to re-calculate Fg, Fn and Fa merge_order = "standard" if series in merge_prereq_list: merge_order = "prerequisite" if merge_order not in ds.merge.keys(): ds.merge[merge_order] = {} # create the dictionary keys for this series ds.merge[merge_order][series] = {} # output series name ds.merge[merge_order][series]["output"] = series # site name ds.merge[merge_order][series]["source"] = ast.literal_eval(cf[section][series]["MergeSeries"]["Source"]) # create an empty series in ds if the output series doesn't exist yet if ds.merge[merge_order][series]["output"] not in ds.series.keys(): data,flag,attr = pfp_utils.MakeEmptySeries(ds,ds.merge[merge_order][series]["output"]) pfp_utils.CreateSeries(ds,ds.merge[merge_order][series]["output"],data,flag,attr)
def ApplyTurbulenceFilter(cf, ds, ustar_threshold=None): """ Purpose: Usage: Author: Date: """ opt = ApplyTurbulenceFilter_checks(cf, ds) if not opt["OK"]: return # local point to datetime series ldt = ds.series["DateTime"]["Data"] # time step ts = int(ds.globalattributes["time_step"]) # dictionary of utar thresold values if ustar_threshold == None: ustar_dict = pfp_rp.get_ustar_thresholds(cf, ldt) else: ustar_dict = pfp_rp.get_ustar_thresholds_annual(ldt, ustar_threshold) # initialise a dictionary for the indicator series indicators = {} # get data for the indicator series ustar, ustar_flag, ustar_attr = pfp_utils.GetSeriesasMA(ds, "ustar") Fsd, f, a = pfp_utils.GetSeriesasMA(ds, "Fsd") if "solar_altitude" not in ds.series.keys(): pfp_ts.get_synthetic_fsd(ds) Fsd_syn, f, a = pfp_utils.GetSeriesasMA(ds, "Fsd_syn") sa, f, a = pfp_utils.GetSeriesasMA(ds, "solar_altitude") # get the day/night indicator series # indicators["day"] = 1 ==> day time, indicators["day"] = 0 ==> night time indicators["day"] = pfp_rp.get_day_indicator(cf, Fsd, Fsd_syn, sa) ind_day = indicators["day"]["values"] # get the turbulence indicator series if opt["turbulence_filter"].lower() == "ustar": # indicators["turbulence"] = 1 ==> turbulent, indicators["turbulence"] = 0 ==> not turbulent indicators["turbulence"] = pfp_rp.get_turbulence_indicator_ustar( ldt, ustar, ustar_dict, ts) elif opt["turbulence_filter"].lower() == "ustar_evg": # ustar >= threshold ==> ind_ustar = 1, ustar < threshold == ind_ustar = 0 indicators["ustar"] = pfp_rp.get_turbulence_indicator_ustar( ldt, ustar, ustar_dict, ts) ind_ustar = indicators["ustar"]["values"] # ustar >= threshold during day AND ustar has been >= threshold since sunset ==> indicators["turbulence"] = 1 # indicators["turbulence"] = 0 during night once ustar has dropped below threshold even if it # increases above the threshold later in the night indicators["turbulence"] = pfp_rp.get_turbulence_indicator_ustar_evg( ldt, ind_day, ind_ustar, ustar, ustar_dict, ts) elif opt["turbulence_filter"].lower() == "l": #indicators["turbulence] = get_turbulence_indicator_l(ldt,L,z,d,zmdonL_threshold) indicators["turbulence"] = numpy.ones(len(ldt)) msg = " Use of L as turbulence indicator not implemented, no filter applied" logger.warning(msg) else: msg = " Unrecognised turbulence filter option (" msg = msg + opt["turbulence_filter"] + "), no filter applied" logger.error(msg) return # initialise the final indicator series as the turbulence indicator # subsequent filters will modify the final indicator series # we must use copy.deepcopy() otherwise the "values" array will only # be copied by reference not value. Damn Python's default of copy by reference! indicators["final"] = copy.deepcopy(indicators["turbulence"]) # check to see if the user wants to accept all day time observations # regardless of ustar value if opt["accept_day_times"].lower() == "yes": # if yes, then we force the final indicator to be 1 # if ustar is below the threshold during the day. idx = numpy.where(indicators["day"]["values"] == 1)[0] indicators["final"]["values"][idx] = numpy.int(1) indicators["final"]["attr"].update(indicators["day"]["attr"]) # get the evening indicator series indicators["evening"] = pfp_rp.get_evening_indicator( cf, Fsd, Fsd_syn, sa, ts) indicators["dayevening"] = { "values": indicators["day"]["values"] + indicators["evening"]["values"] } indicators["dayevening"]["attr"] = indicators["day"]["attr"].copy() indicators["dayevening"]["attr"].update(indicators["evening"]["attr"]) if opt["use_evening_filter"].lower() == "yes": idx = numpy.where(indicators["dayevening"]["values"] == 0)[0] indicators["final"]["values"][idx] = numpy.int(0) indicators["final"]["attr"].update(indicators["dayevening"]["attr"]) # save the indicator series ind_flag = numpy.zeros(len(ldt)) long_name = "Turbulence indicator, 1 for turbulent, 0 for non-turbulent" ind_attr = pfp_utils.MakeAttributeDictionary(long_name=long_name, units="None") pfp_utils.CreateSeries(ds, "turbulence_indicator", indicators["turbulence"]["values"], ind_flag, ind_attr) long_name = "Day indicator, 1 for day time, 0 for night time" ind_attr = pfp_utils.MakeAttributeDictionary(long_name=long_name, units="None") pfp_utils.CreateSeries(ds, "day_indicator", indicators["day"]["values"], ind_flag, ind_attr) long_name = "Evening indicator, 1 for evening, 0 for not evening" ind_attr = pfp_utils.MakeAttributeDictionary(long_name=long_name, units="None") pfp_utils.CreateSeries(ds, "evening_indicator", indicators["evening"]["values"], ind_flag, ind_attr) long_name = "Day/evening indicator, 1 for day/evening, 0 for not day/evening" ind_attr = pfp_utils.MakeAttributeDictionary(long_name=long_name, units="None") pfp_utils.CreateSeries(ds, "dayevening_indicator", indicators["dayevening"]["values"], ind_flag, ind_attr) long_name = "Final indicator, 1 for use data, 0 for don't use data" ind_attr = pfp_utils.MakeAttributeDictionary(long_name=long_name, units="None") pfp_utils.CreateSeries(ds, "final_indicator", indicators["final"]["values"], ind_flag, ind_attr) # loop over the series to be filtered for series in opt["filter_list"]: msg = " Applying " + opt["turbulence_filter"] + " filter to " + series logger.info(msg) # get the data data, flag, attr = pfp_utils.GetSeriesasMA(ds, series) # continue to next series if this series has been filtered before if "turbulence_filter" in attr: msg = " Series " + series + " has already been filtered, skipping ..." logger.warning(msg) continue # save the non-filtered data pfp_utils.CreateSeries(ds, series + "_nofilter", data, flag, attr) # now apply the filter data_filtered = numpy.ma.masked_where( indicators["final"]["values"] == 0, data, copy=True) flag_filtered = numpy.copy(flag) idx = numpy.where(indicators["final"]["values"] == 0)[0] flag_filtered[idx] = numpy.int32(61) # update the series attributes for item in indicators["final"]["attr"].keys(): attr[item] = indicators["final"]["attr"][item] # and write the filtered data to the data structure pfp_utils.CreateSeries(ds, series, data_filtered, flag_filtered, attr) # and write a copy of the filtered datas to the data structure so it # will still exist once the gap filling has been done pfp_utils.CreateSeries(ds, series + "_filtered", data_filtered, flag_filtered, attr) return
def do_dependencycheck(cf, ds, section, series, code=23, mode="quiet"): """ Purpose: Usage: Author: PRI Date: Back in the day """ if len(section) == 0 and len(series) == 0: return if len(section) == 0: section = pfp_utils.get_cfsection(cf, series=series, mode='quiet') if "DependencyCheck" not in cf[section][series].keys(): return if "Source" not in cf[section][series]["DependencyCheck"]: msg = " DependencyCheck: keyword Source not found for series " + series + ", skipping ..." logger.error(msg) return if mode == "verbose": msg = " Doing DependencyCheck for " + series logger.info(msg) # get the precursor source list from the control file source_list = ast.literal_eval( cf[section][series]["DependencyCheck"]["Source"]) # check to see if the "ignore_missing" flag is set opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "DependencyCheck"], "ignore_missing", default="no") ignore_missing = False if opt.lower() in ["yes", "y", "true", "t"]: ignore_missing = True # get the data dependent_data, dependent_flag, dependent_attr = pfp_utils.GetSeries( ds, series) # loop over the precursor source list for item in source_list: # check the precursor is in the data structure if item not in ds.series.keys(): msg = " DependencyCheck: " + series + " precursor series " + item + " not found, skipping ..." logger.warning(msg) continue # get the precursor data precursor_data, precursor_flag, precursor_attr = pfp_utils.GetSeries( ds, item) # check if the user wants to ignore missing precursor data if ignore_missing: # they do, so make an array of missing values nRecs = int(ds.globalattributes["nc_nrecs"]) missing_array = numpy.ones(nRecs) * float(c.missing_value) # and find the indicies of elements equal to the missing value bool_array = numpy.isclose(precursor_data, missing_array) idx = numpy.where(bool_array == True)[0] # and set these flags to 0 so missing data is ignored precursor_flag[idx] = numpy.int32(0) # mask the dependent data where the precursor flag shows data not OK dependent_data = numpy.ma.masked_where( numpy.mod(precursor_flag, 10) != 0, dependent_data) # get an index where the precursor flag shows data not OK idx = numpy.ma.where(numpy.mod(precursor_flag, 10) != 0)[0] # set the dependent QC flag dependent_flag[idx] = numpy.int32(code) # put the data back into the data structure dependent_attr["DependencyCheck_source"] = str(source_list) pfp_utils.CreateSeries(ds, series, dependent_data, dependent_flag, dependent_attr) # our work here is done return
def gfalternate_createdict(cf, ds, series, ds_alt): """ Purpose: Creates a dictionary in ds to hold information about the alternate data used to gap fill the tower data. Usage: Side effects: Author: PRI Date: August 2014 """ # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section)==0: msg = "GapFillFromAlternate: Series %s not found in control file, skipping ...", series logger.error(msg) return # create the alternate directory in the data structure if "alternate" not in dir(ds): ds.alternate = {} # name of alternate output series in ds output_list = cf[section][series]["GapFillFromAlternate"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this output ds.alternate[output] = {} ds.alternate[output]["label_tower"] = series # source name ds.alternate[output]["source"] = cf[section][series]["GapFillFromAlternate"][output]["source"] # site name ds.alternate[output]["site_name"] = ds.globalattributes["site_name"] # alternate data file name # first, look in the [Files] section for a generic file name file_list = cf["Files"].keys() lower_file_list = [item.lower() for item in file_list] if ds.alternate[output]["source"].lower() in lower_file_list: # found a generic file name i = lower_file_list.index(ds.alternate[output]["source"].lower()) ds.alternate[output]["file_name"] = cf["Files"][file_list[i]] else: # no generic file name found, look for a file name in the variable section ds.alternate[output]["file_name"] = cf[section][series]["GapFillFromAlternate"][output]["file_name"] # if the file has not already been read, do it now if ds.alternate[output]["file_name"] not in ds_alt: ds_alternate = pfp_io.nc_read_series(ds.alternate[output]["file_name"],fixtimestepmethod="round") gfalternate_matchstartendtimes(ds,ds_alternate) ds_alt[ds.alternate[output]["file_name"]] = ds_alternate # get the type of fit ds.alternate[output]["fit_type"] = "OLS" if "fit" in cf[section][series]["GapFillFromAlternate"][output]: if cf[section][series]["GapFillFromAlternate"][output]["fit"].lower() in ["ols","ols_thru0","mrev","replace","rma","odr"]: ds.alternate[output]["fit_type"] = cf[section][series]["GapFillFromAlternate"][output]["fit"] else: logger.info("gfAlternate: unrecognised fit option for series %s, used OLS", output) # correct for lag? if "lag" in cf[section][series]["GapFillFromAlternate"][output]: if cf[section][series]["GapFillFromAlternate"][output]["lag"].lower() in ["no","false"]: ds.alternate[output]["lag"] = "no" elif cf[section][series]["GapFillFromAlternate"][output]["lag"].lower() in ["yes","true"]: ds.alternate[output]["lag"] = "yes" else: logger.info("gfAlternate: unrecognised lag option for series %s", output) else: ds.alternate[output]["lag"] = "yes" # choose specific alternate variable? if "usevars" in cf[section][series]["GapFillFromAlternate"][output]: ds.alternate[output]["usevars"] = ast.literal_eval(cf[section][series]["GapFillFromAlternate"][output]["usevars"]) # alternate data variable name if different from name used in control file if "alternate_name" in cf[section][series]["GapFillFromAlternate"][output]: ds.alternate[output]["alternate_name"] = cf[section][series]["GapFillFromAlternate"][output]["alternate_name"] else: ds.alternate[output]["alternate_name"] = series # results of best fit for plotting later on ds.alternate[output]["results"] = {"startdate":[],"enddate":[],"No. points":[],"No. filled":[], "r":[],"Bias":[],"RMSE":[],"Frac Bias":[],"NMSE":[], "Avg (Tower)":[],"Avg (Alt)":[], "Var (Tower)":[],"Var (Alt)":[],"Var ratio":[]} # create an empty series in ds if the alternate output series doesn't exist yet if output not in ds.series.keys(): data,flag,attr = pfp_utils.MakeEmptySeries(ds,output) pfp_utils.CreateSeries(ds,output,data,flag,attr) pfp_utils.CreateSeries(ds,series+"_composite",data,flag,attr)
def gfalternate_matchstartendtimes(ds,ds_alternate): """ Purpose: Match the start and end times of the alternate and tower data. The logic is as follows: - if there is no overlap between the alternate and tower data then dummy series with missing data are created for the alternate data for the period of the tower data - if the alternate and tower data overlap then truncate or pad (with missing values) the alternate data series so that the periods of the tower data and alternate data match. Usage: gfalternate_matchstartendtimes(ds,ds_alternate) where ds is the data structure containing the tower data ds_alternate is the data structure containing the alternate data Author: PRI Date: July 2015 """ # check the time steps are the same ts_tower = int(ds.globalattributes["time_step"]) ts_alternate = int(ds_alternate.globalattributes["time_step"]) if ts_tower!=ts_alternate: msg = " GapFillFromAlternate: time step for tower and alternate data are different, returning ..." logger.error(msg) ds.returncodes["GapFillFromAlternate"] = "error" return # get the start and end times of the tower and the alternate data and see if they overlap ldt_alternate = ds_alternate.series["DateTime"]["Data"] start_alternate = ldt_alternate[0] ldt_tower = ds.series["DateTime"]["Data"] end_tower = ldt_tower[-1] # since the datetime is monotonically increasing we need only check the start datetime overlap = start_alternate<=end_tower # do the alternate and tower data overlap? if overlap: # index of alternate datetimes that are also in tower datetimes #alternate_index = pfp_utils.FindIndicesOfBInA(ldt_tower,ldt_alternate) #alternate_index = [pfp_utils.find_nearest_value(ldt_tower, dt) for dt in ldt_alternate] # index of tower datetimes that are also in alternate datetimes #tower_index = pfp_utils.FindIndicesOfBInA(ldt_alternate,ldt_tower) #tower_index = [pfp_utils.find_nearest_value(ldt_alternate, dt) for dt in ldt_tower] tower_index, alternate_index = pfp_utils.FindMatchingIndices(ldt_tower, ldt_alternate) # check that the indices point to the same times ldta = [ldt_alternate[i] for i in alternate_index] ldtt = [ldt_tower[i] for i in tower_index] if ldta!=ldtt: # and exit with a helpful message if they dont msg = " Something went badly wrong and I'm giving up" logger.error(msg) sys.exit() # get a list of alternate series alternate_series_list = [item for item in ds_alternate.series.keys() if "_QCFlag" not in item] # number of records in truncated or padded alternate data nRecs_tower = len(ldt_tower) # force the alternate dattime to be the tower date time ds_alternate.series["DateTime"] = ds.series["DateTime"] # loop over the alternate series and truncate or pad as required # truncation or padding is handled by the indices for series in alternate_series_list: if series in ["DateTime","DateTime_UTC"]: continue # get the alternate data data,flag,attr = pfp_utils.GetSeriesasMA(ds_alternate,series) # create an array of missing data of the required length data_overlap = numpy.full(nRecs_tower,c.missing_value,dtype=numpy.float64) flag_overlap = numpy.ones(nRecs_tower,dtype=numpy.int32) # replace missing data with alternate data where times match data_overlap[tower_index] = data[alternate_index] flag_overlap[tower_index] = flag[alternate_index] # write the truncated or padded series back into the alternate data structure pfp_utils.CreateSeries(ds_alternate,series,data_overlap,flag_overlap,attr) # update the number of records in the file ds_alternate.globalattributes["nc_nrecs"] = nRecs_tower else: # there is no overlap between the alternate and tower data, create dummy series nRecs = len(ldt_tower) ds_alternate.globalattributes["nc_nrecs"] = nRecs ds_alternate.series["DateTime"] = ds.series["DateTime"] alternate_series_list = [item for item in ds_alternate.series.keys() if "_QCFlag" not in item] for series in alternate_series_list: if series in ["DateTime","DateTime_UTC"]: continue _, _, attr = pfp_utils.GetSeriesasMA(ds_alternate, series) data = numpy.full(nRecs, c.missing_value, dtype=numpy.float64) flag = numpy.ones(nRecs, dtype=numpy.int32) pfp_utils.CreateSeries(ds_alternate, series, data, flag, attr) ds.returncodes["GapFillFromAlternate"] = "normal"
int_fn = scipy.interpolate.Akima1DInterpolator(era5_time_1hr, coef_1hr) # get the coefficient at the tower time step coef_tts = int_fn(era5_time_tts) # ==== old = UnivariateSpline ==== # get the spline interpolation function #s = InterpolatedUnivariateSpline(era5_time_1hr, coef_1hr, k=1) # get the coefficient at the tower time step #coef_tts = s(era5_time_tts) # get the downwelling solar radiation at the tower time step Fsd_era5_tts = coef_tts * numpy.sin(numpy.deg2rad(alt_solar_tts)) flag = numpy.zeros(len(Fsd_era5_tts), dtype=numpy.int32) attr = pfp_utils.MakeAttributeDictionary( long_name="Downwelling short wave radiation", units="W/m2") pfp_utils.CreateSeries(ds_era5, "Fsd", Fsd_era5_tts, flag, attr) # === NET-SHORTWAVE Fn_sw === # # Interpolate the 1 hourly accumulated net shortwave to the tower time step # NOTE: ERA-5 variables are dimensioned [time,latitude,longitude] Fn_sw_3d = era5_file.variables["ssr"][:, :, :] Fn_sw_accum = Fn_sw_3d[:, site_lat_index, site_lon_index] # Net shortwave in ERA-5 is a cummulative value that is reset to 0 at 0300 and 1500 UTC. # Here we convert the cummulative values to 3 hourly values. #Fn_sw_era5_1hr = numpy.ediff1d(Fn_sw_accum,to_begin=0) # deal with the reset times at 0300 and 1500 #idx = numpy.where((hour_utc==3)|(hour_utc==15))[0] #Fn_sw_era5_1hr[idx] = Fn_sw_accum[idx] # get the average value over the 1 hourly period #Fn_sw_era5_1hr = Fn_sw_era5_1hr/(era5_timestep*60) Fn_sw_era5_1hr = Fn_sw_accum / (era5_timestep * 60)
# update global attributes ds.globalattributes["nc_nrecs"] = len(dt_loc) ds.globalattributes["start_datetime"] = str(dt_loc[0]) ds.globalattributes["end_datetime"] = str(dt_loc[-1]) # put the QC'd, smoothed and interpolated EVI into the data structure flag = numpy.zeros(len(dt_loc), dtype=numpy.int32) attr = pfp_utils.MakeAttributeDictionary( long_name="MODIS EVI, smoothed and interpolated", units="none", horiz_resolution="250m", cutout_size=str(3), evi_min=str(evi_min), evi_max=str(evi_max), sg_num_points=str(sgnp), sg_order=str(sgo)) pfp_utils.CreateSeries(ds, "EVI", evi_ts["smoothed"], flag, attr) attr = pfp_utils.MakeAttributeDictionary(long_name="MODIS EVI, interpolated", units="none", horiz_resolution="250m", cutout_size=str(3), evi_min=str(evi_min), evi_max=str(evi_max)) pfp_utils.CreateSeries(ds, "EVI_notsmoothed", evi_ts["mean"], flag, attr) # now write the data structure to a netCDF file out_name = os.path.join(cf["Files"]["base_path"], site, "Data", "MODIS", site + "_EVI.nc") out_file = pfp_io.nc_open_write(out_name) pfp_io.nc_write_series(out_file, ds, ndims=1) print "modis_evi2nc: finished"
def rpLT_createdict(cf, ds, series): """ Purpose: Creates a dictionary in ds to hold information about estimating ecosystem respiration using the Lloyd-Taylor method. Usage: Author: PRI Date October 2015 """ # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: logger.error("ERUsingLloydTaylor: Series " + series + " not found in control file, skipping ...") return # check that none of the drivers have missing data driver_list = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) target = cf[section][series]["ERUsingLloydTaylor"]["target"] for label in driver_list: data, flag, attr = pfp_utils.GetSeriesasMA(ds, label) if numpy.ma.count_masked(data) != 0: logger.error("ERUsingLloydTaylor: driver " + label + " contains missing data, skipping target " + target) return # create the dictionary keys for this series rpLT_info = {} # site name rpLT_info["site_name"] = ds.globalattributes["site_name"] # source series for ER opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "ERUsingLloydTaylor"], "source", default="Fc") rpLT_info["source"] = opt # target series name rpLT_info["target"] = cf[section][series]["ERUsingLloydTaylor"]["target"] # list of drivers rpLT_info["drivers"] = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) # name of SOLO output series in ds rpLT_info["output"] = cf[section][series]["ERUsingLloydTaylor"]["output"] # results of best fit for plotting later on rpLT_info["results"] = { "startdate": [], "enddate": [], "No. points": [], "r": [], "Bias": [], "RMSE": [], "Frac Bias": [], "NMSE": [], "Avg (obs)": [], "Avg (LT)": [], "Var (obs)": [], "Var (LT)": [], "Var ratio": [], "m_ols": [], "b_ols": [] } # create the configuration dictionary rpLT_info["configs_dict"] = get_configs_dict(cf, ds) # create an empty series in ds if the output series doesn't exist yet if rpLT_info["output"] not in ds.series.keys(): data, flag, attr = pfp_utils.MakeEmptySeries(ds, rpLT_info["output"]) pfp_utils.CreateSeries(ds, rpLT_info["output"], data, flag, attr) # create the merge directory in the data structure if "merge" not in dir(ds): ds.merge = {} if "standard" not in ds.merge.keys(): ds.merge["standard"] = {} # create the dictionary keys for this series ds.merge["standard"][series] = {} # output series name ds.merge["standard"][series]["output"] = series # source ds.merge["standard"][series]["source"] = ast.literal_eval( cf[section][series]["MergeSeries"]["Source"]) # create an empty series in ds if the output series doesn't exist yet if ds.merge["standard"][series]["output"] not in ds.series.keys(): data, flag, attr = pfp_utils.MakeEmptySeries( ds, ds.merge["standard"][series]["output"]) pfp_utils.CreateSeries(ds, ds.merge["standard"][series]["output"], data, flag, attr) return rpLT_info