def gfSOLO_createdict(cf,ds,series): """ Creates a dictionary in ds to hold information about the SOLO data used to gap fill the tower data.""" # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf,series=series,mode="quiet") # return without doing anything if the series isn't in a control file section if len(section)==0: msg = "GapFillUsingSOLO: "+series+" not found in control file, skipping ..." logger.error(msg) return # create the solo directory in the data structure if "solo" not in dir(ds): ds.solo = {} # name of SOLO output series in ds output_list = cf[section][series]["GapFillUsingSOLO"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this series ds.solo[output] = {} # get the target if "target" in cf[section][series]["GapFillUsingSOLO"][output]: ds.solo[output]["label_tower"] = cf[section][series]["GapFillUsingSOLO"][output]["target"] else: ds.solo[output]["label_tower"] = series # site name ds.solo[output]["site_name"] = ds.globalattributes["site_name"] # list of SOLO settings if "solo_settings" in cf[section][series]["GapFillUsingSOLO"][output]: ss_list = ast.literal_eval(cf[section][series]["GapFillUsingSOLO"][output]["solo_settings"]) ds.solo[output]["solo_settings"] = {} ds.solo[output]["solo_settings"]["nodes_target"] = int(ss_list[0]) ds.solo[output]["solo_settings"]["training"] = int(ss_list[1]) ds.solo[output]["solo_settings"]["factor"] = int(ss_list[2]) ds.solo[output]["solo_settings"]["learningrate"] = float(ss_list[3]) ds.solo[output]["solo_settings"]["iterations"] = int(ss_list[4]) # list of drivers ds.solo[output]["drivers"] = ast.literal_eval(cf[section][series]["GapFillUsingSOLO"][output]["drivers"]) # apply ustar filter opt = pfp_utils.get_keyvaluefromcf(cf,[section,series,"GapFillUsingSOLO",output], "turbulence_filter",default="") ds.solo[output]["turbulence_filter"] = opt opt = pfp_utils.get_keyvaluefromcf(cf,[section,series,"GapFillUsingSOLO",output], "daynight_filter",default="") ds.solo[output]["daynight_filter"] = opt # results of best fit for plotting later on ds.solo[output]["results"] = {"startdate":[],"enddate":[],"No. points":[],"r":[], "Bias":[],"RMSE":[],"Frac Bias":[],"NMSE":[], "Avg (obs)":[],"Avg (SOLO)":[], "Var (obs)":[],"Var (SOLO)":[],"Var ratio":[], "m_ols":[],"b_ols":[]} # create an empty series in ds if the SOLO output series doesn't exist yet if output not in ds.series.keys(): data,flag,attr = pfp_utils.MakeEmptySeries(ds,output) pfp_utils.CreateSeries(ds,output,data,flag,attr)
def gfClimatology_createdict(cf, ds, series): """ Creates a dictionary in ds to hold information about the climatological data used to gap fill the tower data.""" # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series,mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: msg = "GapFillFromClimatology: "+series+" not found in control file, skipping ..." logger.error(msg) return # create the climatology directory in the data structure if "climatology" not in dir(ds): ds.climatology = {} # name of alternate output series in ds output_list = cf[section][series]["GapFillFromClimatology"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this output ds.climatology[output] = {} ds.climatology[output]["label_tower"] = series # site name ds.climatology[output]["site_name"] = ds.globalattributes["site_name"] # Climatology file name file_list = cf["Files"].keys() lower_file_list = [item.lower() for item in file_list] # first, look in the [Files] section for a generic file name if "climatology" in lower_file_list: # found a generic file name i = lower_file_list.index("climatology") ds.climatology[output]["file_name"] = cf["Files"][file_list[i]] else: # no generic file name found, look for a file name in the variable section ds.climatology[output]["file_name"] = cf[section][series]["GapFillFromClimatology"][output]["file_name"] # climatology variable name if different from name used in control file if "climatology_name" in cf[section][series]["GapFillFromClimatology"][output]: ds.climatology[output]["climatology_name"] = cf[section][series]["GapFillFromClimatology"][output]["climatology_name"] else: ds.climatology[output]["climatology_name"] = series # climatology gap filling method if "method" not in cf[section][series]["GapFillFromClimatology"][output].keys(): # default if "method" missing is "interpolated_daily" ds.climatology[output]["method"] = "interpolated_daily" else: ds.climatology[output]["method"] = cf[section][series]["GapFillFromClimatology"][output]["method"] # create an empty series in ds if the climatology output series doesn't exist yet if output not in ds.series.keys(): data, flag, attr = pfp_utils.MakeEmptySeries(ds, output) pfp_utils.CreateSeries(ds, output, data, flag, attr)
def gfMergeSeries_createdict(cf,ds,series): """ Creates a dictionary in ds to hold information about the merging of gap filled and tower data.""" merge_prereq_list = ["Fsd","Fsu","Fld","Flu","Ts","Sws"] # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf,series=series,mode="quiet") # create the merge directory in the data structure if "merge" not in dir(ds): ds.merge = {} # check to see if this series is in the "merge first" list # series in the "merge first" list get merged first so they can be used with existing tower # data to re-calculate Fg, Fn and Fa merge_order = "standard" if series in merge_prereq_list: merge_order = "prerequisite" if merge_order not in ds.merge.keys(): ds.merge[merge_order] = {} # create the dictionary keys for this series ds.merge[merge_order][series] = {} # output series name ds.merge[merge_order][series]["output"] = series # site name ds.merge[merge_order][series]["source"] = ast.literal_eval(cf[section][series]["MergeSeries"]["Source"]) # create an empty series in ds if the output series doesn't exist yet if ds.merge[merge_order][series]["output"] not in ds.series.keys(): data,flag,attr = pfp_utils.MakeEmptySeries(ds,ds.merge[merge_order][series]["output"]) pfp_utils.CreateSeries(ds,ds.merge[merge_order][series]["output"],data,flag,attr)
def gfalternate_createdict(cf, ds, series, ds_alt): """ Purpose: Creates a dictionary in ds to hold information about the alternate data used to gap fill the tower data. Usage: Side effects: Author: PRI Date: August 2014 """ # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section)==0: msg = "GapFillFromAlternate: Series %s not found in control file, skipping ...", series logger.error(msg) return # create the alternate directory in the data structure if "alternate" not in dir(ds): ds.alternate = {} # name of alternate output series in ds output_list = cf[section][series]["GapFillFromAlternate"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this output ds.alternate[output] = {} ds.alternate[output]["label_tower"] = series # source name ds.alternate[output]["source"] = cf[section][series]["GapFillFromAlternate"][output]["source"] # site name ds.alternate[output]["site_name"] = ds.globalattributes["site_name"] # alternate data file name # first, look in the [Files] section for a generic file name file_list = cf["Files"].keys() lower_file_list = [item.lower() for item in file_list] if ds.alternate[output]["source"].lower() in lower_file_list: # found a generic file name i = lower_file_list.index(ds.alternate[output]["source"].lower()) ds.alternate[output]["file_name"] = cf["Files"][file_list[i]] else: # no generic file name found, look for a file name in the variable section ds.alternate[output]["file_name"] = cf[section][series]["GapFillFromAlternate"][output]["file_name"] # if the file has not already been read, do it now if ds.alternate[output]["file_name"] not in ds_alt: ds_alternate = pfp_io.nc_read_series(ds.alternate[output]["file_name"],fixtimestepmethod="round") gfalternate_matchstartendtimes(ds,ds_alternate) ds_alt[ds.alternate[output]["file_name"]] = ds_alternate # get the type of fit ds.alternate[output]["fit_type"] = "OLS" if "fit" in cf[section][series]["GapFillFromAlternate"][output]: if cf[section][series]["GapFillFromAlternate"][output]["fit"].lower() in ["ols","ols_thru0","mrev","replace","rma","odr"]: ds.alternate[output]["fit_type"] = cf[section][series]["GapFillFromAlternate"][output]["fit"] else: logger.info("gfAlternate: unrecognised fit option for series %s, used OLS", output) # correct for lag? if "lag" in cf[section][series]["GapFillFromAlternate"][output]: if cf[section][series]["GapFillFromAlternate"][output]["lag"].lower() in ["no","false"]: ds.alternate[output]["lag"] = "no" elif cf[section][series]["GapFillFromAlternate"][output]["lag"].lower() in ["yes","true"]: ds.alternate[output]["lag"] = "yes" else: logger.info("gfAlternate: unrecognised lag option for series %s", output) else: ds.alternate[output]["lag"] = "yes" # choose specific alternate variable? if "usevars" in cf[section][series]["GapFillFromAlternate"][output]: ds.alternate[output]["usevars"] = ast.literal_eval(cf[section][series]["GapFillFromAlternate"][output]["usevars"]) # alternate data variable name if different from name used in control file if "alternate_name" in cf[section][series]["GapFillFromAlternate"][output]: ds.alternate[output]["alternate_name"] = cf[section][series]["GapFillFromAlternate"][output]["alternate_name"] else: ds.alternate[output]["alternate_name"] = series # results of best fit for plotting later on ds.alternate[output]["results"] = {"startdate":[],"enddate":[],"No. points":[],"No. filled":[], "r":[],"Bias":[],"RMSE":[],"Frac Bias":[],"NMSE":[], "Avg (Tower)":[],"Avg (Alt)":[], "Var (Tower)":[],"Var (Alt)":[],"Var ratio":[]} # create an empty series in ds if the alternate output series doesn't exist yet if output not in ds.series.keys(): data,flag,attr = pfp_utils.MakeEmptySeries(ds,output) pfp_utils.CreateSeries(ds,output,data,flag,attr) pfp_utils.CreateSeries(ds,series+"_composite",data,flag,attr)
def rpLT_createdict(cf, ds, series): """ Purpose: Creates a dictionary in ds to hold information about estimating ecosystem respiration using the Lloyd-Taylor method. Usage: Author: PRI Date October 2015 """ # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: logger.error("ERUsingLloydTaylor: Series " + series + " not found in control file, skipping ...") return # check that none of the drivers have missing data driver_list = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) target = cf[section][series]["ERUsingLloydTaylor"]["target"] for label in driver_list: data, flag, attr = pfp_utils.GetSeriesasMA(ds, label) if numpy.ma.count_masked(data) != 0: logger.error("ERUsingLloydTaylor: driver " + label + " contains missing data, skipping target " + target) return # create the dictionary keys for this series rpLT_info = {} # site name rpLT_info["site_name"] = ds.globalattributes["site_name"] # source series for ER opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "ERUsingLloydTaylor"], "source", default="Fc") rpLT_info["source"] = opt # target series name rpLT_info["target"] = cf[section][series]["ERUsingLloydTaylor"]["target"] # list of drivers rpLT_info["drivers"] = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) # name of SOLO output series in ds rpLT_info["output"] = cf[section][series]["ERUsingLloydTaylor"]["output"] # results of best fit for plotting later on rpLT_info["results"] = { "startdate": [], "enddate": [], "No. points": [], "r": [], "Bias": [], "RMSE": [], "Frac Bias": [], "NMSE": [], "Avg (obs)": [], "Avg (LT)": [], "Var (obs)": [], "Var (LT)": [], "Var ratio": [], "m_ols": [], "b_ols": [] } # create the configuration dictionary rpLT_info["configs_dict"] = get_configs_dict(cf, ds) # create an empty series in ds if the output series doesn't exist yet if rpLT_info["output"] not in ds.series.keys(): data, flag, attr = pfp_utils.MakeEmptySeries(ds, rpLT_info["output"]) pfp_utils.CreateSeries(ds, rpLT_info["output"], data, flag, attr) # create the merge directory in the data structure if "merge" not in dir(ds): ds.merge = {} if "standard" not in ds.merge.keys(): ds.merge["standard"] = {} # create the dictionary keys for this series ds.merge["standard"][series] = {} # output series name ds.merge["standard"][series]["output"] = series # source ds.merge["standard"][series]["source"] = ast.literal_eval( cf[section][series]["MergeSeries"]["Source"]) # create an empty series in ds if the output series doesn't exist yet if ds.merge["standard"][series]["output"] not in ds.series.keys(): data, flag, attr = pfp_utils.MakeEmptySeries( ds, ds.merge["standard"][series]["output"]) pfp_utils.CreateSeries(ds, ds.merge["standard"][series]["output"], data, flag, attr) return rpLT_info