def GapFillFluxUsingMDS(cf, ds, series=""): section = pfp_utils.get_cfsection(cf, series=series, mode="quiet") if len(section)==0: return if "GapFillFluxUsingMDS" in cf[section][series].keys(): logger.info(" GapFillFluxUsingMDS: not implemented yet") return
def gfSOLO_createdict(cf,ds,series): """ Creates a dictionary in ds to hold information about the SOLO data used to gap fill the tower data.""" # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf,series=series,mode="quiet") # return without doing anything if the series isn't in a control file section if len(section)==0: msg = "GapFillUsingSOLO: "+series+" not found in control file, skipping ..." logger.error(msg) return # create the solo directory in the data structure if "solo" not in dir(ds): ds.solo = {} # name of SOLO output series in ds output_list = cf[section][series]["GapFillUsingSOLO"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this series ds.solo[output] = {} # get the target if "target" in cf[section][series]["GapFillUsingSOLO"][output]: ds.solo[output]["label_tower"] = cf[section][series]["GapFillUsingSOLO"][output]["target"] else: ds.solo[output]["label_tower"] = series # site name ds.solo[output]["site_name"] = ds.globalattributes["site_name"] # list of SOLO settings if "solo_settings" in cf[section][series]["GapFillUsingSOLO"][output]: ss_list = ast.literal_eval(cf[section][series]["GapFillUsingSOLO"][output]["solo_settings"]) ds.solo[output]["solo_settings"] = {} ds.solo[output]["solo_settings"]["nodes_target"] = int(ss_list[0]) ds.solo[output]["solo_settings"]["training"] = int(ss_list[1]) ds.solo[output]["solo_settings"]["factor"] = int(ss_list[2]) ds.solo[output]["solo_settings"]["learningrate"] = float(ss_list[3]) ds.solo[output]["solo_settings"]["iterations"] = int(ss_list[4]) # list of drivers ds.solo[output]["drivers"] = ast.literal_eval(cf[section][series]["GapFillUsingSOLO"][output]["drivers"]) # apply ustar filter opt = pfp_utils.get_keyvaluefromcf(cf,[section,series,"GapFillUsingSOLO",output], "turbulence_filter",default="") ds.solo[output]["turbulence_filter"] = opt opt = pfp_utils.get_keyvaluefromcf(cf,[section,series,"GapFillUsingSOLO",output], "daynight_filter",default="") ds.solo[output]["daynight_filter"] = opt # results of best fit for plotting later on ds.solo[output]["results"] = {"startdate":[],"enddate":[],"No. points":[],"r":[], "Bias":[],"RMSE":[],"Frac Bias":[],"NMSE":[], "Avg (obs)":[],"Avg (SOLO)":[], "Var (obs)":[],"Var (SOLO)":[],"Var ratio":[], "m_ols":[],"b_ols":[]} # create an empty series in ds if the SOLO output series doesn't exist yet if output not in ds.series.keys(): data,flag,attr = pfp_utils.MakeEmptySeries(ds,output) pfp_utils.CreateSeries(ds,output,data,flag,attr)
def gfClimatology_createdict(cf, ds, series): """ Creates a dictionary in ds to hold information about the climatological data used to gap fill the tower data.""" # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series,mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: msg = "GapFillFromClimatology: "+series+" not found in control file, skipping ..." logger.error(msg) return # create the climatology directory in the data structure if "climatology" not in dir(ds): ds.climatology = {} # name of alternate output series in ds output_list = cf[section][series]["GapFillFromClimatology"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this output ds.climatology[output] = {} ds.climatology[output]["label_tower"] = series # site name ds.climatology[output]["site_name"] = ds.globalattributes["site_name"] # Climatology file name file_list = cf["Files"].keys() lower_file_list = [item.lower() for item in file_list] # first, look in the [Files] section for a generic file name if "climatology" in lower_file_list: # found a generic file name i = lower_file_list.index("climatology") ds.climatology[output]["file_name"] = cf["Files"][file_list[i]] else: # no generic file name found, look for a file name in the variable section ds.climatology[output]["file_name"] = cf[section][series]["GapFillFromClimatology"][output]["file_name"] # climatology variable name if different from name used in control file if "climatology_name" in cf[section][series]["GapFillFromClimatology"][output]: ds.climatology[output]["climatology_name"] = cf[section][series]["GapFillFromClimatology"][output]["climatology_name"] else: ds.climatology[output]["climatology_name"] = series # climatology gap filling method if "method" not in cf[section][series]["GapFillFromClimatology"][output].keys(): # default if "method" missing is "interpolated_daily" ds.climatology[output]["method"] = "interpolated_daily" else: ds.climatology[output]["method"] = cf[section][series]["GapFillFromClimatology"][output]["method"] # create an empty series in ds if the climatology output series doesn't exist yet if output not in ds.series.keys(): data, flag, attr = pfp_utils.MakeEmptySeries(ds, output) pfp_utils.CreateSeries(ds, output, data, flag, attr)
def UpdateVariableAttributes_QC(cf, variable): """ Purpose: Usage: Side effects: Author: PRI Date: November 2016 """ label = variable["Label"] section = pfp_utils.get_cfsection(cf, series=label, mode='quiet') if label not in cf[section]: return if "RangeCheck" not in cf[section][label]: return if "Lower" in cf[section][label]["RangeCheck"]: variable["Attr"]["rangecheck_lower"] = cf[section][label][ "RangeCheck"]["Lower"] if "Upper" in cf[section][label]["RangeCheck"]: variable["Attr"]["rangecheck_upper"] = cf[section][label][ "RangeCheck"]["Upper"] return
def GapFillParseControlFile(cf, ds, series, ds_alt): # find the section containing the series section = pfp_utils.get_cfsection(cf, series=series, mode="quiet") # return empty handed if the series is not in a section if len(section) == 0: return if "GapFillFromAlternate" in cf[section][series].keys(): # create the alternate dictionary in ds gfalternate_createdict(cf, ds, series, ds_alt) if "GapFillUsingSOLO" in cf[section][series].keys(): # create the SOLO dictionary in ds gfSOLO_createdict(cf, ds, series) if "GapFillUsingMDS" in cf[section][series].keys(): # create the MDS dictionary in ds gfMDS_createdict(cf, ds, series) if "GapFillFromClimatology" in cf[section][series].keys(): # create the climatology dictionary in the data structure gfClimatology_createdict(cf, ds, series) if "MergeSeries" in cf[section][series].keys(): # create the merge series dictionary in the data structure gfMergeSeries_createdict(cf, ds, series)
def do_qcchecks_oneseries(cf, ds, section, series): if len(section) == 0: section = pfp_utils.get_cfsection(cf, series=series, mode='quiet') if len(section) == 0: return # do the range check do_rangecheck(cf, ds, section, series, code=2) # do the lower range check do_lowercheck(cf, ds, section, series, code=2) # do the upper range check do_uppercheck(cf, ds, section, series, code=2) # do the diurnal check do_diurnalcheck(cf, ds, section, series, code=5) # do the EP QC flag check do_EPQCFlagCheck(cf, ds, section, series, code=9) # do exclude dates do_excludedates(cf, ds, section, series, code=6) # do exclude hours do_excludehours(cf, ds, section, series, code=7) # do wind direction corrections do_winddirectioncorrection(cf, ds, section, series) if 'do_qcchecks' not in ds.globalattributes['Functions']: ds.globalattributes[ 'Functions'] = ds.globalattributes['Functions'] + ',do_qcchecks'
def gfMergeSeries_createdict(cf,ds,series): """ Creates a dictionary in ds to hold information about the merging of gap filled and tower data.""" merge_prereq_list = ["Fsd","Fsu","Fld","Flu","Ts","Sws"] # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf,series=series,mode="quiet") # create the merge directory in the data structure if "merge" not in dir(ds): ds.merge = {} # check to see if this series is in the "merge first" list # series in the "merge first" list get merged first so they can be used with existing tower # data to re-calculate Fg, Fn and Fa merge_order = "standard" if series in merge_prereq_list: merge_order = "prerequisite" if merge_order not in ds.merge.keys(): ds.merge[merge_order] = {} # create the dictionary keys for this series ds.merge[merge_order][series] = {} # output series name ds.merge[merge_order][series]["output"] = series # site name ds.merge[merge_order][series]["source"] = ast.literal_eval(cf[section][series]["MergeSeries"]["Source"]) # create an empty series in ds if the output series doesn't exist yet if ds.merge[merge_order][series]["output"] not in ds.series.keys(): data,flag,attr = pfp_utils.MakeEmptySeries(ds,ds.merge[merge_order][series]["output"]) pfp_utils.CreateSeries(ds,ds.merge[merge_order][series]["output"],data,flag,attr)
def do_dependencycheck(cf, ds, section, series, code=23, mode="quiet"): """ Purpose: Usage: Author: PRI Date: Back in the day """ if len(section) == 0 and len(series) == 0: return if len(section) == 0: section = pfp_utils.get_cfsection(cf, series=series, mode='quiet') if "DependencyCheck" not in cf[section][series].keys(): return if "Source" not in cf[section][series]["DependencyCheck"]: msg = " DependencyCheck: keyword Source not found for series " + series + ", skipping ..." logger.error(msg) return if mode == "verbose": msg = " Doing DependencyCheck for " + series logger.info(msg) # get the precursor source list from the control file source_list = ast.literal_eval( cf[section][series]["DependencyCheck"]["Source"]) # check to see if the "ignore_missing" flag is set opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "DependencyCheck"], "ignore_missing", default="no") ignore_missing = False if opt.lower() in ["yes", "y", "true", "t"]: ignore_missing = True # get the data dependent_data, dependent_flag, dependent_attr = pfp_utils.GetSeries( ds, series) # loop over the precursor source list for item in source_list: # check the precursor is in the data structure if item not in ds.series.keys(): msg = " DependencyCheck: " + series + " precursor series " + item + " not found, skipping ..." logger.warning(msg) continue # get the precursor data precursor_data, precursor_flag, precursor_attr = pfp_utils.GetSeries( ds, item) # check if the user wants to ignore missing precursor data if ignore_missing: # they do, so make an array of missing values nRecs = int(ds.globalattributes["nc_nrecs"]) missing_array = numpy.ones(nRecs) * float(c.missing_value) # and find the indicies of elements equal to the missing value bool_array = numpy.isclose(precursor_data, missing_array) idx = numpy.where(bool_array == True)[0] # and set these flags to 0 so missing data is ignored precursor_flag[idx] = numpy.int32(0) # mask the dependent data where the precursor flag shows data not OK dependent_data = numpy.ma.masked_where( numpy.mod(precursor_flag, 10) != 0, dependent_data) # get an index where the precursor flag shows data not OK idx = numpy.ma.where(numpy.mod(precursor_flag, 10) != 0)[0] # set the dependent QC flag dependent_flag[idx] = numpy.int32(code) # put the data back into the data structure dependent_attr["DependencyCheck_source"] = str(source_list) pfp_utils.CreateSeries(ds, series, dependent_data, dependent_flag, dependent_attr) # our work here is done return
def gfalternate_createdict(cf, ds, series, ds_alt): """ Purpose: Creates a dictionary in ds to hold information about the alternate data used to gap fill the tower data. Usage: Side effects: Author: PRI Date: August 2014 """ # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section)==0: msg = "GapFillFromAlternate: Series %s not found in control file, skipping ...", series logger.error(msg) return # create the alternate directory in the data structure if "alternate" not in dir(ds): ds.alternate = {} # name of alternate output series in ds output_list = cf[section][series]["GapFillFromAlternate"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this output ds.alternate[output] = {} ds.alternate[output]["label_tower"] = series # source name ds.alternate[output]["source"] = cf[section][series]["GapFillFromAlternate"][output]["source"] # site name ds.alternate[output]["site_name"] = ds.globalattributes["site_name"] # alternate data file name # first, look in the [Files] section for a generic file name file_list = cf["Files"].keys() lower_file_list = [item.lower() for item in file_list] if ds.alternate[output]["source"].lower() in lower_file_list: # found a generic file name i = lower_file_list.index(ds.alternate[output]["source"].lower()) ds.alternate[output]["file_name"] = cf["Files"][file_list[i]] else: # no generic file name found, look for a file name in the variable section ds.alternate[output]["file_name"] = cf[section][series]["GapFillFromAlternate"][output]["file_name"] # if the file has not already been read, do it now if ds.alternate[output]["file_name"] not in ds_alt: ds_alternate = pfp_io.nc_read_series(ds.alternate[output]["file_name"],fixtimestepmethod="round") gfalternate_matchstartendtimes(ds,ds_alternate) ds_alt[ds.alternate[output]["file_name"]] = ds_alternate # get the type of fit ds.alternate[output]["fit_type"] = "OLS" if "fit" in cf[section][series]["GapFillFromAlternate"][output]: if cf[section][series]["GapFillFromAlternate"][output]["fit"].lower() in ["ols","ols_thru0","mrev","replace","rma","odr"]: ds.alternate[output]["fit_type"] = cf[section][series]["GapFillFromAlternate"][output]["fit"] else: logger.info("gfAlternate: unrecognised fit option for series %s, used OLS", output) # correct for lag? if "lag" in cf[section][series]["GapFillFromAlternate"][output]: if cf[section][series]["GapFillFromAlternate"][output]["lag"].lower() in ["no","false"]: ds.alternate[output]["lag"] = "no" elif cf[section][series]["GapFillFromAlternate"][output]["lag"].lower() in ["yes","true"]: ds.alternate[output]["lag"] = "yes" else: logger.info("gfAlternate: unrecognised lag option for series %s", output) else: ds.alternate[output]["lag"] = "yes" # choose specific alternate variable? if "usevars" in cf[section][series]["GapFillFromAlternate"][output]: ds.alternate[output]["usevars"] = ast.literal_eval(cf[section][series]["GapFillFromAlternate"][output]["usevars"]) # alternate data variable name if different from name used in control file if "alternate_name" in cf[section][series]["GapFillFromAlternate"][output]: ds.alternate[output]["alternate_name"] = cf[section][series]["GapFillFromAlternate"][output]["alternate_name"] else: ds.alternate[output]["alternate_name"] = series # results of best fit for plotting later on ds.alternate[output]["results"] = {"startdate":[],"enddate":[],"No. points":[],"No. filled":[], "r":[],"Bias":[],"RMSE":[],"Frac Bias":[],"NMSE":[], "Avg (Tower)":[],"Avg (Alt)":[], "Var (Tower)":[],"Var (Alt)":[],"Var ratio":[]} # create an empty series in ds if the alternate output series doesn't exist yet if output not in ds.series.keys(): data,flag,attr = pfp_utils.MakeEmptySeries(ds,output) pfp_utils.CreateSeries(ds,output,data,flag,attr) pfp_utils.CreateSeries(ds,series+"_composite",data,flag,attr)
def gfMDS_createdict(cf, ds, series): """ Purpose: Create an information dictionary for MDS gap filling from the contents of the control file. Usage: info["MDS"] = gfMDS_createdict(cf) Author: PRI Date: May 2018 """ # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section)==0: msg = "GapFillUsingMDS: "+series+" not found in control file, skipping ..." logger.error(msg) return # create the MDS attribute (a dictionary) in ds, this will hold all MDS settings if "mds" not in dir(ds): ds.mds = {} # name of MDS output series in ds output_list = cf[section][series]["GapFillUsingMDS"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this series ds.mds[output] = {} # get the target if "target" in cf[section][series]["GapFillUsingMDS"][output]: ds.mds[output]["target"] = cf[section][series]["GapFillUsingMDS"][output]["target"] else: ds.mds[output]["target"] = series # site name ds.mds[output]["site_name"] = ds.globalattributes["site_name"] # list of SOLO settings if "mds_settings" in cf[section][series]["GapFillUsingMDS"][output]: mdss_list = ast.literal_eval(cf[section][series]["GapFillUsingMDS"][output]["mds_settings"]) # list of drivers ds.mds[output]["drivers"] = ast.literal_eval(cf[section][series]["GapFillUsingMDS"][output]["drivers"]) # list of tolerances ds.mds[output]["tolerances"] = ast.literal_eval(cf[section][series]["GapFillUsingMDS"][output]["tolerances"]) # get the ustar filter option opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "GapFillUsingMDS", output], "turbulence_filter", default="") ds.mds[output]["turbulence_filter"] = opt # get the day/night filter option opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "GapFillUsingMDS", output], "daynight_filter", default="") ds.mds[output]["daynight_filter"] = opt # check that all requested targets and drivers have a mapping to # a FluxNet label, remove if they don't fluxnet_label_map = {"Fc":"NEE", "Fe":"LE", "Fh":"H", "Fsd":"SW_IN", "Ta":"TA", "VPD":"VPD"} for mds_label in ds.mds: ds.mds[mds_label]["mds_label"] = mds_label pfp_target = ds.mds[mds_label]["target"] if pfp_target not in fluxnet_label_map: msg = " Target ("+pfp_target+") not supported for MDS gap filling" logger.warning(msg) del ds.mds[mds_label] else: ds.mds[mds_label]["target_mds"] = fluxnet_label_map[pfp_target] pfp_drivers = ds.mds[mds_label]["drivers"] for pfp_driver in pfp_drivers: if pfp_driver not in fluxnet_label_map: msg = "Driver ("+pfp_driver+") not supported for MDS gap filling" logger.warning(msg) ds.mds[mds_label]["drivers"].remove(pfp_driver) else: if "drivers_mds" not in ds.mds[mds_label]: ds.mds[mds_label]["drivers_mds"] = [] ds.mds[mds_label]["drivers_mds"].append(fluxnet_label_map[pfp_driver]) if len(ds.mds[mds_label]["drivers"]) == 0: del ds.mds[mds_label] return
def rpLT_createdict(cf, ds, series): """ Purpose: Creates a dictionary in ds to hold information about estimating ecosystem respiration using the Lloyd-Taylor method. Usage: Author: PRI Date October 2015 """ # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: logger.error("ERUsingLloydTaylor: Series " + series + " not found in control file, skipping ...") return # check that none of the drivers have missing data driver_list = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) target = cf[section][series]["ERUsingLloydTaylor"]["target"] for label in driver_list: data, flag, attr = pfp_utils.GetSeriesasMA(ds, label) if numpy.ma.count_masked(data) != 0: logger.error("ERUsingLloydTaylor: driver " + label + " contains missing data, skipping target " + target) return # create the dictionary keys for this series rpLT_info = {} # site name rpLT_info["site_name"] = ds.globalattributes["site_name"] # source series for ER opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "ERUsingLloydTaylor"], "source", default="Fc") rpLT_info["source"] = opt # target series name rpLT_info["target"] = cf[section][series]["ERUsingLloydTaylor"]["target"] # list of drivers rpLT_info["drivers"] = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) # name of SOLO output series in ds rpLT_info["output"] = cf[section][series]["ERUsingLloydTaylor"]["output"] # results of best fit for plotting later on rpLT_info["results"] = { "startdate": [], "enddate": [], "No. points": [], "r": [], "Bias": [], "RMSE": [], "Frac Bias": [], "NMSE": [], "Avg (obs)": [], "Avg (LT)": [], "Var (obs)": [], "Var (LT)": [], "Var ratio": [], "m_ols": [], "b_ols": [] } # create the configuration dictionary rpLT_info["configs_dict"] = get_configs_dict(cf, ds) # create an empty series in ds if the output series doesn't exist yet if rpLT_info["output"] not in ds.series.keys(): data, flag, attr = pfp_utils.MakeEmptySeries(ds, rpLT_info["output"]) pfp_utils.CreateSeries(ds, rpLT_info["output"], data, flag, attr) # create the merge directory in the data structure if "merge" not in dir(ds): ds.merge = {} if "standard" not in ds.merge.keys(): ds.merge["standard"] = {} # create the dictionary keys for this series ds.merge["standard"][series] = {} # output series name ds.merge["standard"][series]["output"] = series # source ds.merge["standard"][series]["source"] = ast.literal_eval( cf[section][series]["MergeSeries"]["Source"]) # create an empty series in ds if the output series doesn't exist yet if ds.merge["standard"][series]["output"] not in ds.series.keys(): data, flag, attr = pfp_utils.MakeEmptySeries( ds, ds.merge["standard"][series]["output"]) pfp_utils.CreateSeries(ds, ds.merge["standard"][series]["output"], data, flag, attr) return rpLT_info