def ImportSeries(cf,ds): # check to see if there is an Imports section if "Imports" not in cf.keys(): return # number of records nRecs = int(ds.globalattributes["nc_nrecs"]) # get the start and end datetime ldt = ds.series["DateTime"]["Data"] start_date = ldt[0] end_date = ldt[-1] # loop over the series in the Imports section for label in cf["Imports"].keys(): import_filename = pfp_utils.get_keyvaluefromcf(cf,["Imports",label],"file_name",default="") if import_filename=="": msg = " ImportSeries: import filename not found in control file, skipping ..." logger.warning(msg) continue var_name = pfp_utils.get_keyvaluefromcf(cf,["Imports",label],"var_name",default="") if var_name=="": msg = " ImportSeries: variable name not found in control file, skipping ..." logger.warning(msg) continue ds_import = pfp_io.nc_read_series(import_filename) ts_import = ds_import.globalattributes["time_step"] ldt_import = ds_import.series["DateTime"]["Data"] si = pfp_utils.GetDateIndex(ldt_import,str(start_date),ts=ts_import,default=0,match="exact") ei = pfp_utils.GetDateIndex(ldt_import,str(end_date),ts=ts_import,default=len(ldt_import)-1,match="exact") data = numpy.ma.ones(nRecs)*float(c.missing_value) flag = numpy.ma.ones(nRecs) data_import,flag_import,attr_import = pfp_utils.GetSeriesasMA(ds_import,var_name,si=si,ei=ei) ldt_import = ldt_import[si:ei+1] index = pfp_utils.FindIndicesOfBInA(ldt_import,ldt) data[index] = data_import flag[index] = flag_import pfp_utils.CreateSeries(ds,label,data,flag,attr_import)
def rpLT_createdict_info(cf, ds, erlt, called_by): """ Purpose: Usage: Side effects: Author: PRI Date: Back in the day June 2019 - modified for new l5_info structure """ # reset the return message and code ds.returncodes["message"] = "OK" ds.returncodes["value"] = 0 # time step time_step = int(ds.globalattributes["time_step"]) # get the level of processing level = ds.globalattributes["nc_level"] # local pointer to the datetime series ldt = ds.series["DateTime"]["Data"] # add an info section to the info["solo"] dictionary erlt["info"]["file_startdate"] = ldt[0].strftime("%Y-%m-%d %H:%M") erlt["info"]["file_enddate"] = ldt[-1].strftime("%Y-%m-%d %H:%M") erlt["info"]["startdate"] = ldt[0].strftime("%Y-%m-%d %H:%M") erlt["info"]["enddate"] = ldt[-1].strftime("%Y-%m-%d %H:%M") erlt["info"]["called_by"] = called_by erlt["info"]["time_step"] = time_step # check to see if this is a batch or an interactive run call_mode = pfp_utils.get_keyvaluefromcf(cf, ["Options"], "call_mode", default="interactive") erlt["info"]["call_mode"] = call_mode erlt["gui"]["show_plots"] = False if call_mode.lower() == "interactive": erlt["gui"]["show_plots"] = True # truncate to last date in Imports? truncate = pfp_utils.get_keyvaluefromcf(cf, ["Options"], "TruncateToImports", default="Yes") erlt["info"]["truncate_to_imports"] = truncate # number of records per day and maximum lags nperhr = int(float(60)/time_step + 0.5) erlt["info"]["nperday"] = int(float(24)*nperhr + 0.5) erlt["info"]["maxlags"] = int(float(12)*nperhr + 0.5) # get the plot path plot_path = pfp_utils.get_keyvaluefromcf(cf, ["Files"], "plot_path", default="./plots/") plot_path = os.path.join(plot_path, level, "") if not os.path.exists(plot_path): try: os.makedirs(plot_path) except OSError: msg = "Unable to create the plot path " + plot_path + "\n" msg = msg + "Press 'Quit' to edit the control file.\n" msg = msg + "Press 'Continue' to use the default path.\n" result = pfp_gui.MsgBox_ContinueOrQuit(msg, title="Warning: L6 plot path") if result.clickedButton().text() == "Quit": # user wants to edit the control file msg = " Quitting L6 to edit control file" logger.warning(msg) ds.returncodes["message"] = msg ds.returncodes["value"] = 1 else: plot_path = "./plots/" cf["Files"]["plot_path"] = "./plots/" erlt["info"]["plot_path"] = plot_path return
def GapFillUsingInterpolation(cf,ds): """ Purpose: Gap fill variables in the data structure using interpolation. All variables in the [Variables], [Drivers] and [Fluxes] section are processed. Usage: pfp_gf.GapFillUsingInterpolation(cf,ds) where cf is a control file object ds is a data structure Author: PRI Date: September 2016 """ ts = int(ds.globalattributes["time_step"]) # get list of variables from control file label_list = pfp_utils.get_label_list_from_cf(cf) # get the maximum gap length to be filled by interpolation max_length_hours = int(pfp_utils.get_keyvaluefromcf(cf, ["Options"], "MaxGapInterpolate", default=3)) # bug out if interpolation disabled in control file if max_length_hours == 0: msg = " Gap fill by interpolation disabled in control file" logger.info(msg) return # get the interpolation type int_type = str(pfp_utils.get_keyvaluefromcf(cf, ["Options"], "InterpolateType", default="Akima")) # tell the user what we are doing msg = " Using " + int_type +" interpolation (max. gap = " + str(max_length_hours) +" hours)" logger.info(msg) # do the business # convert from max. gap length in hours to number of time steps max_length_points = int((max_length_hours*float(60)/float(ts))+0.5) for label in label_list: pfp_ts.InterpolateOverMissing(ds, series=label, maxlen=max_length_points, int_type=int_type)
def gfSOLO_createdict(cf,ds,series): """ Creates a dictionary in ds to hold information about the SOLO data used to gap fill the tower data.""" # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf,series=series,mode="quiet") # return without doing anything if the series isn't in a control file section if len(section)==0: msg = "GapFillUsingSOLO: "+series+" not found in control file, skipping ..." logger.error(msg) return # create the solo directory in the data structure if "solo" not in dir(ds): ds.solo = {} # name of SOLO output series in ds output_list = cf[section][series]["GapFillUsingSOLO"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this series ds.solo[output] = {} # get the target if "target" in cf[section][series]["GapFillUsingSOLO"][output]: ds.solo[output]["label_tower"] = cf[section][series]["GapFillUsingSOLO"][output]["target"] else: ds.solo[output]["label_tower"] = series # site name ds.solo[output]["site_name"] = ds.globalattributes["site_name"] # list of SOLO settings if "solo_settings" in cf[section][series]["GapFillUsingSOLO"][output]: ss_list = ast.literal_eval(cf[section][series]["GapFillUsingSOLO"][output]["solo_settings"]) ds.solo[output]["solo_settings"] = {} ds.solo[output]["solo_settings"]["nodes_target"] = int(ss_list[0]) ds.solo[output]["solo_settings"]["training"] = int(ss_list[1]) ds.solo[output]["solo_settings"]["factor"] = int(ss_list[2]) ds.solo[output]["solo_settings"]["learningrate"] = float(ss_list[3]) ds.solo[output]["solo_settings"]["iterations"] = int(ss_list[4]) # list of drivers ds.solo[output]["drivers"] = ast.literal_eval(cf[section][series]["GapFillUsingSOLO"][output]["drivers"]) # apply ustar filter opt = pfp_utils.get_keyvaluefromcf(cf,[section,series,"GapFillUsingSOLO",output], "turbulence_filter",default="") ds.solo[output]["turbulence_filter"] = opt opt = pfp_utils.get_keyvaluefromcf(cf,[section,series,"GapFillUsingSOLO",output], "daynight_filter",default="") ds.solo[output]["daynight_filter"] = opt # results of best fit for plotting later on ds.solo[output]["results"] = {"startdate":[],"enddate":[],"No. points":[],"r":[], "Bias":[],"RMSE":[],"Frac Bias":[],"NMSE":[], "Avg (obs)":[],"Avg (SOLO)":[], "Var (obs)":[],"Var (SOLO)":[],"Var ratio":[], "m_ols":[],"b_ols":[]} # create an empty series in ds if the SOLO output series doesn't exist yet if output not in ds.series.keys(): data,flag,attr = pfp_utils.MakeEmptySeries(ds,output) pfp_utils.CreateSeries(ds,output,data,flag,attr)
def rpLL_createdict_outputs(cf, erll, target, called_by, flag_code): level = cf["level"] eo = erll["outputs"] # loop over the outputs listed in the control file section = "EcosystemRespiration" outputs = cf[section][target][called_by].keys() for output in outputs: # create the dictionary keys for this series eo[output] = {} # get the target sl = [section, target, called_by, output] eo[output]["target"] = pfp_utils.get_keyvaluefromcf(cf, sl, "target", default=target) eo[output]["source"] = pfp_utils.get_keyvaluefromcf(cf, sl, "source", default="Fc") # add the flag_code eo[output]["flag_code"] = flag_code # list of drivers opt = pfp_utils.get_keyvaluefromcf(cf, sl, "drivers", default="Ta") eo[output]["drivers"] = pfp_cfg.cfg_string_to_list(opt) opt = pfp_utils.get_keyvaluefromcf(cf, sl, "step_size_days", default=5) eo[output]["step_size_days"] = int(opt) opt = pfp_utils.get_keyvaluefromcf(cf, sl, "window_size_days", default=15) eo[output]["window_size_days"] = int(opt) opt = pfp_utils.get_keyvaluefromcf(cf, sl, "output_plots", default="False") eo[output]["output_plots"] = (opt == "True") opt = pfp_utils.get_keyvaluefromcf(cf, sl, "fsd_threshold", default=10) eo[output]["fsd_threshold"] = int(opt) # fit statistics for plotting later on eo[output]["results"] = {"startdate":[],"enddate":[],"No. points":[],"r":[], "Bias":[],"RMSE":[],"Frac Bias":[],"NMSE":[], "Avg (obs)":[],"Avg (LL)":[], "Var (obs)":[],"Var (LL)":[],"Var ratio":[], "m_ols":[],"b_ols":[]} return
def l1qc(cf): # get the data series from the Excel file in_filename = pfp_io.get_infilenamefromcf(cf) if not pfp_utils.file_exists(in_filename, mode="quiet"): msg = " Input file " + in_filename + " not found ..." logger.error(msg) ds1 = pfp_io.DataStructure() ds1.returncodes = {"value": 1, "message": msg} return ds1 file_name, file_extension = os.path.splitext(in_filename) if "csv" in file_extension.lower(): ds1 = pfp_io.csv_read_series(cf) if ds1.returncodes["value"] != 0: return ds1 # get a series of Excel datetime from the Python datetime objects #pfp_utils.get_xldatefromdatetime(ds1) else: ds1 = pfp_io.xl_read_series(cf) if ds1.returncodes["value"] != 0: return ds1 # get a series of Python datetime objects from the Excel datetime #pfp_utils.get_datetime_from_xldate(ds1) # get the netCDF attributes from the control file #pfp_ts.do_attributes(cf,ds1) pfp_utils.get_datetime(cf, ds1) # round the Python datetime to the nearest second pfp_utils.round_datetime(ds1, mode="nearest_second") #check for gaps in the Python datetime series and fix if present fixtimestepmethod = pfp_utils.get_keyvaluefromcf(cf, ["options"], "FixTimeStepMethod", default="round") if pfp_utils.CheckTimeStep(ds1): pfp_utils.FixTimeStep(ds1, fixtimestepmethod=fixtimestepmethod) # recalculate the Excel datetime #pfp_utils.get_xldatefromdatetime(ds1) # get the Year, Month, Day etc from the Python datetime #pfp_utils.get_ymdhmsfromdatetime(ds1) # write the processing level to a global attribute ds1.globalattributes['nc_level'] = str("L1") # get the start and end date from the datetime series unless they were # given in the control file if 'start_date' not in ds1.globalattributes.keys(): ds1.globalattributes['start_date'] = str( ds1.series['DateTime']['Data'][0]) if 'end_date' not in ds1.globalattributes.keys(): ds1.globalattributes['end_date'] = str( ds1.series['DateTime']['Data'][-1]) # calculate variances from standard deviations and vice versa pfp_ts.CalculateStandardDeviations(cf, ds1) # create new variables using user defined functions pfp_ts.DoFunctions(cf, ds1) # create a series of synthetic downwelling shortwave radiation #pfp_ts.get_synthetic_fsd(ds1) # check missing data and QC flags are consistent pfp_utils.CheckQCFlags(ds1) return ds1
def l4qc(main_gui, cf, ds3): ds4 = pfp_io.copy_datastructure(cf, ds3) # ds4 will be empty (logical false) if an error occurs in copy_datastructure # return from this routine if this is the case if not ds4: return ds4 # set some attributes for this level pfp_utils.UpdateGlobalAttributes(cf, ds4, "L4") # check to see if we have any imports pfp_gf.ImportSeries(cf, ds4) # re-apply the quality control checks (range, diurnal and rules) pfp_ck.do_qcchecks(cf, ds4) # now do the meteorological driver gap filling # parse the control file for information on how the user wants to do the gap filling l4_info = pfp_gf.ParseL4ControlFile(cf, ds4) if ds4.returncodes["value"] != 0: return ds4 # *** start of the section that does the gap filling of the drivers *** # read the alternate data files ds_alt = pfp_gf.ReadAlternateFiles(ds4, l4_info) # fill short gaps using interpolation pfp_gf.GapFillUsingInterpolation(cf, ds4) # gap fill using climatology if "GapFillFromClimatology" in l4_info: pfp_gf.GapFillFromClimatology(ds4, l4_info, "GapFillFromClimatology") # do the gap filling using the ACCESS output if "GapFillFromAlternate" in l4_info: pfp_gfALT.GapFillFromAlternate(main_gui, ds4, ds_alt, l4_info, "GapFillFromAlternate") if ds4.returncodes["value"] != 0: return ds4 # merge the first group of gap filled drivers into a single series pfp_ts.MergeSeriesUsingDict(ds4, l4_info, merge_order="prerequisite") # re-calculate the ground heat flux but only if requested in control file opt = pfp_utils.get_keyvaluefromcf(cf,["Options"], "CorrectFgForStorage", default="No", mode="quiet") if opt.lower() != "no": pfp_ts.CorrectFgForStorage(cf, ds4, Fg_out='Fg', Fg_in='Fg_Av', Ts_in='Ts', Sws_in='Sws') # re-calculate the net radiation pfp_ts.CalculateNetRadiation(cf, ds4, Fn_out='Fn', Fsd_in='Fsd', Fsu_in='Fsu', Fld_in='Fld', Flu_in='Flu') # re-calculate the available energy pfp_ts.CalculateAvailableEnergy(ds4, Fa_out='Fa', Fn_in='Fn', Fg_in='Fg') # merge the second group of gap filled drivers into a single series pfp_ts.MergeSeriesUsingDict(ds4, l4_info, merge_order="standard") # re-calculate the water vapour concentrations pfp_ts.CalculateHumiditiesAfterGapFill(ds4, l4_info) # re-calculate the meteorological variables pfp_ts.CalculateMeteorologicalVariables(ds4, l4_info) # check for any missing data pfp_utils.get_missingingapfilledseries(ds4, l4_info) # write the percentage of good data as a variable attribute pfp_utils.get_coverage_individual(ds4) # write the percentage of good data for groups pfp_utils.get_coverage_groups(ds4) # remove intermediate series from the data structure pfp_ts.RemoveIntermediateSeries(ds4, l4_info) return ds4
def do_concatenate_batch(cf_level): logger = pfp_log.change_logger_filename("pfp_log", "concatenate") for i in cf_level.keys(): if not os.path.isfile(cf_level[i]): msg = " Control file " + cf_level[i] + " not found" logger.error(msg) continue cf_file_name = os.path.split(cf_level[i]) msg = "Starting concatenation with " + cf_file_name[1] logger.info(msg) try: cf_cc = pfp_io.get_controlfilecontents(cf_level[i]) info = pfp_compliance.ParseConcatenateControlFile(cf_cc) if not info["NetCDFConcatenate"]["OK"]: msg = " Error occurred parsing the control file " + cf_file_name[1] logger.error(msg) continue pfp_io.NetCDFConcatenate(info) msg = "Finished concatenation with " + cf_file_name[1] logger.info(msg) # now plot the fingerprints for the concatenated files opt = pfp_utils.get_keyvaluefromcf(cf_cc, ["Options"], "DoFingerprints", default="yes") if opt.lower() == "no": continue cf_fp = pfp_io.get_controlfilecontents("controlfiles/standard/fingerprint.txt") if "Files" not in dir(cf_fp): cf_fp["Files"] = {} file_name = cf_cc["Files"]["Out"]["ncFileName"] file_path = ntpath.split(file_name)[0] + "/" cf_fp["Files"]["file_path"] = file_path cf_fp["Files"]["in_filename"] = ntpath.split(file_name)[1] if "plot_path" in cf_cc["Files"]: cf_fp["Files"]["plot_path"] = cf_cc["Files"]["plot_path"] else: cf_fp["Files"]["plot_path"] = file_path[:file_path.index("Data")] + "Plots/" if "Options" not in cf_fp: cf_fp["Options"] = {} cf_fp["Options"]["call_mode"] = "batch" cf_fp["Options"]["show_plots"] = "No" msg = "Doing fingerprint plots using " + cf_fp["Files"]["in_filename"] logger.info(msg) pfp_plot.plot_fingerprint(cf_fp) msg = "Finished fingerprint plots" logger.info(msg) logger.info("") except Exception: msg = "Error occurred during concatenation with " + cf_file_name[1] logger.error(msg) error_message = traceback.format_exc() logger.error(error_message) continue return
def do_IRGAcheck(cf, ds): """ Purpose: Decide which IRGA check routine to use depending on the setting of the "irga_type" key in the [Options] section of the control file. The default is Li7500. Usage: Author: PRI Date: September 2015 """ irga_list = ["li7500", "li7500a", "li7500rs", "ec150", "ec155", "irgason"] # get the IRGA type from the control file irga_type = pfp_utils.get_keyvaluefromcf(cf, ["Options"], "irga_type", default="li7500") # remove any hyphens or spaces for item in ["-", " "]: if item in irga_type: irga_type = irga_type.replace(item, "") # check the IRGA type against the list of suppprted devices if irga_type.lower() not in irga_list: msg = " Unrecognised IRGA type " + irga_type + " given in control file, IRGA checks skipped ..." logger.error(msg) return # do the IRGA checks if irga_type.lower() == "li7500": ds.globalattributes["irga_type"] = irga_type do_li7500check(cf, ds) elif irga_type.lower() in ["li7500a", "irgason"]: ds.globalattributes["irga_type"] = irga_type do_li7500acheck(cf, ds) elif irga_type.lower() in ["ec155", "ec150", "irgason"]: ds.globalattributes["irga_type"] = irga_type do_EC155check(cf, ds) else: msg = " Unsupported IRGA type " + irga_type + ", contact the devloper ..." logger.error(msg) return
def l4qc(cf, ds3): # !!! code here to use existing L4 file # logic # if the L4 doesn't exist # - create ds4 by using copy.deepcopy(ds3) # if the L4 does exist and the "UseExistingL4File" option is False # - create ds4 by using copy.deepcopy(ds3) # if the L4 does exist and the "UseExistingL4File" option is True # - read the contents of the L4 netCDF file # - check the start and end dates of the L3 and L4 data # - if these are the same then tell the user there is nothing to do # - copy the L3 data to the L4 data structure # - replace the L3 data with the L4 data #ds4 = copy.deepcopy(ds3) ds4 = pfp_io.copy_datastructure(cf, ds3) # ds4 will be empty (logical false) if an error occurs in copy_datastructure # return from this routine if this is the case if not ds4: return ds4 # set some attributes for this level pfp_utils.UpdateGlobalAttributes(cf, ds4, "L4") ds4.cf = cf ## calculate the available energy #if "Fa" not in ds4.series.keys(): #pfp_ts.CalculateAvailableEnergy(ds4,Fa_out='Fa',Fn_in='Fn',Fg_in='Fg') # create a dictionary to hold the gap filling data ds_alt = {} # check to see if we have any imports pfp_gf.ImportSeries(cf, ds4) # re-apply the quality control checks (range, diurnal and rules) pfp_ck.do_qcchecks(cf, ds4) # now do the meteorological driver gap filling for ThisOne in cf["Drivers"].keys(): if ThisOne not in ds4.series.keys(): logger.warning("Series " + ThisOne + " not in data structure") continue # parse the control file for information on how the user wants to do the gap filling pfp_gf.GapFillParseControlFile(cf, ds4, ThisOne, ds_alt) # *** start of the section that does the gap filling of the drivers *** # fill short gaps using interpolation pfp_gf.GapFillUsingInterpolation(cf, ds4) # gap fill using climatology pfp_gf.GapFillFromClimatology(ds4) # do the gap filling using the ACCESS output pfp_gfALT.GapFillFromAlternate(cf, ds4, ds_alt) if ds4.returncodes["alternate"] == "quit": return ds4 # gap fill using SOLO pfp_gfSOLO.GapFillUsingSOLO(cf, ds3, ds4) if ds4.returncodes["solo"] == "quit": return ds4 # merge the first group of gap filled drivers into a single series pfp_ts.MergeSeriesUsingDict(ds4, merge_order="prerequisite") # re-calculate the ground heat flux but only if requested in control file opt = pfp_utils.get_keyvaluefromcf(cf, ["Options"], "CorrectFgForStorage", default="No", mode="quiet") if opt.lower() != "no": pfp_ts.CorrectFgForStorage(cf, ds4, Fg_out='Fg', Fg_in='Fg_Av', Ts_in='Ts', Sws_in='Sws') # re-calculate the net radiation pfp_ts.CalculateNetRadiation(cf, ds4, Fn_out='Fn', Fsd_in='Fsd', Fsu_in='Fsu', Fld_in='Fld', Flu_in='Flu') # re-calculate the available energy pfp_ts.CalculateAvailableEnergy(ds4, Fa_out='Fa', Fn_in='Fn', Fg_in='Fg') # merge the second group of gap filled drivers into a single series pfp_ts.MergeSeriesUsingDict(ds4, merge_order="standard") # re-calculate the water vapour concentrations pfp_ts.CalculateHumiditiesAfterGapFill(ds4) # re-calculate the meteorological variables pfp_ts.CalculateMeteorologicalVariables(ds4) # the Tumba rhumba pfp_ts.CalculateComponentsFromWsWd(ds4) # check for any missing data pfp_utils.get_missingingapfilledseries(ds4) # write the percentage of good data as a variable attribute pfp_utils.get_coverage_individual(ds4) # write the percentage of good data for groups pfp_utils.get_coverage_groups(ds4) return ds4
def do_dependencycheck(cf, ds, section, series, code=23, mode="quiet"): """ Purpose: Usage: Author: PRI Date: Back in the day """ if len(section) == 0 and len(series) == 0: return if len(section) == 0: section = pfp_utils.get_cfsection(cf, series=series, mode='quiet') if "DependencyCheck" not in cf[section][series].keys(): return if "Source" not in cf[section][series]["DependencyCheck"]: msg = " DependencyCheck: keyword Source not found for series " + series + ", skipping ..." logger.error(msg) return if mode == "verbose": msg = " Doing DependencyCheck for " + series logger.info(msg) # get the precursor source list from the control file source_list = ast.literal_eval( cf[section][series]["DependencyCheck"]["Source"]) # check to see if the "ignore_missing" flag is set opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "DependencyCheck"], "ignore_missing", default="no") ignore_missing = False if opt.lower() in ["yes", "y", "true", "t"]: ignore_missing = True # get the data dependent_data, dependent_flag, dependent_attr = pfp_utils.GetSeries( ds, series) # loop over the precursor source list for item in source_list: # check the precursor is in the data structure if item not in ds.series.keys(): msg = " DependencyCheck: " + series + " precursor series " + item + " not found, skipping ..." logger.warning(msg) continue # get the precursor data precursor_data, precursor_flag, precursor_attr = pfp_utils.GetSeries( ds, item) # check if the user wants to ignore missing precursor data if ignore_missing: # they do, so make an array of missing values nRecs = int(ds.globalattributes["nc_nrecs"]) missing_array = numpy.ones(nRecs) * float(c.missing_value) # and find the indicies of elements equal to the missing value bool_array = numpy.isclose(precursor_data, missing_array) idx = numpy.where(bool_array == True)[0] # and set these flags to 0 so missing data is ignored precursor_flag[idx] = numpy.int32(0) # mask the dependent data where the precursor flag shows data not OK dependent_data = numpy.ma.masked_where( numpy.mod(precursor_flag, 10) != 0, dependent_data) # get an index where the precursor flag shows data not OK idx = numpy.ma.where(numpy.mod(precursor_flag, 10) != 0)[0] # set the dependent QC flag dependent_flag[idx] = numpy.int32(code) # put the data back into the data structure dependent_attr["DependencyCheck_source"] = str(source_list) pfp_utils.CreateSeries(ds, series, dependent_data, dependent_flag, dependent_attr) # our work here is done return
def ApplyTurbulenceFilter_checks(cf, ds): """ Purpose: Usage: Author: Date: """ opt = {"OK": True, "turbulence_filter": "ustar", "filter_list": ['Fc']} # return if there is no Options section in control file if "Options" not in cf: msg = " ApplyTurbulenceFilter: Options section not found in control file" logger.warning(msg) opt["OK"] = False return opt # get the value of the TurbulenceFilter key in the Options section opt["turbulence_filter"] = pfp_utils.get_keyvaluefromcf(cf, ["Options"], "TurbulenceFilter", default="None") # return if turbulence filter disabled if opt["turbulence_filter"].lower() == "none": msg = " Turbulence filter disabled in control file at " + ds.globalattributes[ "nc_level"] logger.info(msg) opt["OK"] = False return opt # check to see if filter type can be handled if opt["turbulence_filter"].lower() not in ["ustar", "ustar_evg", "l"]: msg = " Unrecognised turbulence filter option (" msg = msg + opt["turbulence_filter"] + "), no filter applied" logger.error(msg) opt["OK"] = False return opt # get the list of series to be filtered if "FilterList" in cf["Options"]: opt["filter_list"] = ast.literal_eval(cf["Options"]["FilterList"]) # check to see if the series are in the data structure for item in opt["filter_list"]: if item not in ds.series.keys(): msg = " Series " + item + " given in FilterList not found in data stucture" logger.warning(msg) opt["filter_list"].remove(item) # return if the filter list is empty if len(opt["filter_list"]) == 0: msg = " FilterList in control file is empty, skipping turbulence filter" logger.warning(msg) opt["OK"] = False return opt # get the value of the DayNightFilter key in the Options section opt["daynight_filter"] = pfp_utils.get_keyvaluefromcf(cf, ["Options"], "DayNightFilter", default="None") # check to see if filter type can be handled if opt["daynight_filter"].lower() not in ["fsd", "sa", "none"]: msg = " Unrecognised day/night filter option (" msg = msg + opt["daynight_filter"] + "), no filter applied" logger.error(msg) opt["OK"] = False return opt # check to see if all day time values are to be accepted opt["accept_day_times"] = pfp_utils.get_keyvaluefromcf(cf, ["Options"], "AcceptDayTimes", default="Yes") opt["use_evening_filter"] = pfp_utils.get_keyvaluefromcf( cf, ["Options"], "UseEveningFilter", default="Yes") return opt
def rpLT_createdict(cf, ds, series): """ Purpose: Creates a dictionary in ds to hold information about estimating ecosystem respiration using the Lloyd-Taylor method. Usage: Author: PRI Date October 2015 """ # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: logger.error("ERUsingLloydTaylor: Series " + series + " not found in control file, skipping ...") return # check that none of the drivers have missing data driver_list = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) target = cf[section][series]["ERUsingLloydTaylor"]["target"] for label in driver_list: data, flag, attr = pfp_utils.GetSeriesasMA(ds, label) if numpy.ma.count_masked(data) != 0: logger.error("ERUsingLloydTaylor: driver " + label + " contains missing data, skipping target " + target) return # create the dictionary keys for this series rpLT_info = {} # site name rpLT_info["site_name"] = ds.globalattributes["site_name"] # source series for ER opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "ERUsingLloydTaylor"], "source", default="Fc") rpLT_info["source"] = opt # target series name rpLT_info["target"] = cf[section][series]["ERUsingLloydTaylor"]["target"] # list of drivers rpLT_info["drivers"] = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) # name of SOLO output series in ds rpLT_info["output"] = cf[section][series]["ERUsingLloydTaylor"]["output"] # results of best fit for plotting later on rpLT_info["results"] = { "startdate": [], "enddate": [], "No. points": [], "r": [], "Bias": [], "RMSE": [], "Frac Bias": [], "NMSE": [], "Avg (obs)": [], "Avg (LT)": [], "Var (obs)": [], "Var (LT)": [], "Var ratio": [], "m_ols": [], "b_ols": [] } # create the configuration dictionary rpLT_info["configs_dict"] = get_configs_dict(cf, ds) # create an empty series in ds if the output series doesn't exist yet if rpLT_info["output"] not in ds.series.keys(): data, flag, attr = pfp_utils.MakeEmptySeries(ds, rpLT_info["output"]) pfp_utils.CreateSeries(ds, rpLT_info["output"], data, flag, attr) # create the merge directory in the data structure if "merge" not in dir(ds): ds.merge = {} if "standard" not in ds.merge.keys(): ds.merge["standard"] = {} # create the dictionary keys for this series ds.merge["standard"][series] = {} # output series name ds.merge["standard"][series]["output"] = series # source ds.merge["standard"][series]["source"] = ast.literal_eval( cf[section][series]["MergeSeries"]["Source"]) # create an empty series in ds if the output series doesn't exist yet if ds.merge["standard"][series]["output"] not in ds.series.keys(): data, flag, attr = pfp_utils.MakeEmptySeries( ds, ds.merge["standard"][series]["output"]) pfp_utils.CreateSeries(ds, ds.merge["standard"][series]["output"], data, flag, attr) return rpLT_info
def gfMDS_createdict(cf, ds, series): """ Purpose: Create an information dictionary for MDS gap filling from the contents of the control file. Usage: info["MDS"] = gfMDS_createdict(cf) Author: PRI Date: May 2018 """ # get the section of the control file containing the series section = pfp_utils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section)==0: msg = "GapFillUsingMDS: "+series+" not found in control file, skipping ..." logger.error(msg) return # create the MDS attribute (a dictionary) in ds, this will hold all MDS settings if "mds" not in dir(ds): ds.mds = {} # name of MDS output series in ds output_list = cf[section][series]["GapFillUsingMDS"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this series ds.mds[output] = {} # get the target if "target" in cf[section][series]["GapFillUsingMDS"][output]: ds.mds[output]["target"] = cf[section][series]["GapFillUsingMDS"][output]["target"] else: ds.mds[output]["target"] = series # site name ds.mds[output]["site_name"] = ds.globalattributes["site_name"] # list of SOLO settings if "mds_settings" in cf[section][series]["GapFillUsingMDS"][output]: mdss_list = ast.literal_eval(cf[section][series]["GapFillUsingMDS"][output]["mds_settings"]) # list of drivers ds.mds[output]["drivers"] = ast.literal_eval(cf[section][series]["GapFillUsingMDS"][output]["drivers"]) # list of tolerances ds.mds[output]["tolerances"] = ast.literal_eval(cf[section][series]["GapFillUsingMDS"][output]["tolerances"]) # get the ustar filter option opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "GapFillUsingMDS", output], "turbulence_filter", default="") ds.mds[output]["turbulence_filter"] = opt # get the day/night filter option opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "GapFillUsingMDS", output], "daynight_filter", default="") ds.mds[output]["daynight_filter"] = opt # check that all requested targets and drivers have a mapping to # a FluxNet label, remove if they don't fluxnet_label_map = {"Fc":"NEE", "Fe":"LE", "Fh":"H", "Fsd":"SW_IN", "Ta":"TA", "VPD":"VPD"} for mds_label in ds.mds: ds.mds[mds_label]["mds_label"] = mds_label pfp_target = ds.mds[mds_label]["target"] if pfp_target not in fluxnet_label_map: msg = " Target ("+pfp_target+") not supported for MDS gap filling" logger.warning(msg) del ds.mds[mds_label] else: ds.mds[mds_label]["target_mds"] = fluxnet_label_map[pfp_target] pfp_drivers = ds.mds[mds_label]["drivers"] for pfp_driver in pfp_drivers: if pfp_driver not in fluxnet_label_map: msg = "Driver ("+pfp_driver+") not supported for MDS gap filling" logger.warning(msg) ds.mds[mds_label]["drivers"].remove(pfp_driver) else: if "drivers_mds" not in ds.mds[mds_label]: ds.mds[mds_label]["drivers_mds"] = [] ds.mds[mds_label]["drivers_mds"].append(fluxnet_label_map[pfp_driver]) if len(ds.mds[mds_label]["drivers"]) == 0: del ds.mds[mds_label] return
elif level.lower() == "concatenate": # concatenate netCDF files for i in cf_batch["Levels"][level].keys(): cfname = cf_batch["Levels"][level][i] if not os.path.isfile(cfname): msg = " Control file " + cfname + " not found" logger.error(msg) continue cf_file_name = os.path.split(cfname) logger.info('Starting concatenation with ' + cf_file_name[1]) cf_cc = pfp_io.get_controlfilecontents(cfname) pfp_io.nc_concatenate(cf_cc) logger.info('Finished concatenation with ' + cf_file_name[1]) # now plot the fingerprints for the concatenated files opt = pfp_utils.get_keyvaluefromcf(cf_cc, ["Options"], "DoFingerprints", default="yes") if opt.lower() == "no": continue cf_fp = pfp_io.get_controlfilecontents( "controlfiles/standard/fingerprint.txt") if "Files" not in dir(cf_fp): cf_fp["Files"] = {} file_name = cf_cc["Files"]["Out"]["ncFileName"] file_path = ntpath.split(file_name)[0] + "/" cf_fp["Files"]["file_path"] = file_path cf_fp["Files"]["in_filename"] = ntpath.split(file_name)[1] cf_fp["Files"]["plot_path"] = file_path[:file_path. index("Data")] + "Plots/" if "Options" not in cf_fp: cf_fp["Options"] = {} cf_fp["Options"]["call_mode"] = "batch" cf_fp["Options"]["show_plots"] = "no" logger.info('Doing fingerprint plots using ' +
def CPD_run(cf): # Set input file and output path and create directories for plots and results path_out = cf['Files']['file_path'] file_in = os.path.join(cf['Files']['file_path'],cf['Files']['in_filename']) # if "out_filename" in cf['Files']: file_out = os.path.join(cf['Files']['file_path'],cf['Files']['out_filename']) else: file_out = os.path.join(cf['Files']['file_path'],cf['Files']['in_filename'].replace(".nc","_CPD.xls")) plot_path = "plots/" if "plot_path" in cf["Files"]: plot_path = os.path.join(cf["Files"]["plot_path"],"CPD/") if not os.path.isdir(plot_path): os.makedirs(plot_path) results_path = path_out if not os.path.isdir(results_path): os.makedirs(results_path) # get a dictionary of the variable names var_list = cf["Variables"].keys() names = {} for item in var_list: if "AltVarName" in cf["Variables"][item].keys(): names[item] = cf["Variables"][item]["AltVarName"] else: names[item] = item # read the netcdf file logger.info(' Reading netCDF file '+file_in) ds = pfp_io.nc_read_series(file_in) nrecs = int(ds.globalattributes["nc_nrecs"]) ts = int(ds.globalattributes["time_step"]) # get the datetime dt = ds.series["DateTime"]["Data"] # adjust the datetime so that the last time period in a year is correctly assigned. # e.g. last period for 2013 is 2014-01-01 00:00, here we make the year 2013 dt = dt - datetime.timedelta(minutes=ts) # now get the data d = {} f = {} for item in names.keys(): data,flag,attr = pfp_utils.GetSeries(ds,names[item]) d[item] = np.where(data==c.missing_value,np.nan,data) f[item] = flag # set all data to NaNs if any flag not 0 or 10 for item in f.keys(): for f_OK in [0,10]: idx = np.where(f[item]!=0)[0] if len(idx)!=0: for itemd in d.keys(): d[itemd][idx] = np.nan d["Year"] = np.array([ldt.year for ldt in dt]) df=pd.DataFrame(d,index=dt) # replace missing values with NaN df.replace(c.missing_value,np.nan) # Build dictionary of additional configs d={} d['radiation_threshold']=int(cf['Options']['Fsd_threshold']) d['num_bootstraps']=int(cf['Options']['Num_bootstraps']) d['flux_period']=int(ds.globalattributes["time_step"]) d['site_name']=ds.globalattributes["site_name"] d["call_mode"]=pfp_utils.get_keyvaluefromcf(cf,["Options"],"call_mode",default="interactive",mode="quiet") d["show_plots"] = pfp_utils.get_optionskeyaslogical(cf, "show_plots", default=True) d['plot_tclass'] = False if cf['Options']['Plot_TClass'] == 'True': d['plot_tclass'] = True if cf['Options']['Output_plots']=='True': d['plot_path']=plot_path if cf['Options']['Output_results']=='True': d['results_path']=results_path d["file_out"]=file_out return df,d
def get_configs_dict(cf, ds): # configs_dict = {'nan_value': -9999, # 'minimum_temperature_spread': 5, # 'step_size_days': 5, # 'window_size_days': 15, # 'min_pct_annual': 30, # 'min_pct_noct_window': 20, # 'min_pct_day_window': 50, # 'output_plots': False, # 'measurement_interval': 0.5, # 'QC_accept_code': 0, # 'plot_output_path': '/home/imchugh/Documents'} configs_dict = {} configs_dict["nan_value"] = int(c.missing_value) opt = pfp_utils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "minimum_temperature_spread", default=5) configs_dict["minimum_temperature_spread"] = int(opt) opt = pfp_utils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "step_size_days", default=5) configs_dict["step_size_days"] = int(opt) opt = pfp_utils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "window_size_days", default=15) configs_dict["window_size_days"] = int(opt) opt = pfp_utils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "minimum_percent_annual", default=30) configs_dict["minimum_pct_annual"] = int(opt) opt = pfp_utils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "minimum_percent_noct_window", default=20) configs_dict["minimum_pct_noct_window"] = int(opt) #opt = pfp_utils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], #"minimum_percent_day_window", #default=50) #configs_dict["minimum_pct_day_window"] = int(opt) opt = pfp_utils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "output_plots", default="False") configs_dict["output_plots"] = (opt == "True") opt = pfp_utils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "show_plots", default="False") configs_dict["show_plots"] = (opt == "True") opt = pfp_utils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "target", default="ER") configs_dict["target"] = str(opt) opt = pfp_utils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "drivers", default="['Ta']") configs_dict["drivers"] = ast.literal_eval(opt)[0] opt = pfp_utils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "output", default="ER_LT_all") configs_dict["output_label"] = opt configs_dict["output_results"] = True ts = int(ds.globalattributes["time_step"]) configs_dict["measurement_interval"] = float(ts) / 60.0 configs_dict["QC_accept_code"] = 0 opt = pfp_utils.get_keyvaluefromcf(cf, ["Files"], "plot_path", default="plots/") configs_dict["output_path"] = os.path.join(opt, "respiration/") return configs_dict