def GapFillUsingInterpolation(cf, ds): """ Purpose: Gap fill variables in the data structure using interpolation. All variables in the [Variables], [Drivers] and [Fluxes] section are processed. Usage: qcgf.GapFillUsingInterpolation(cf,ds) where cf is a control file object ds is a data structure Author: PRI Date: September 2016 """ label_list = qcutils.get_label_list_from_cf(cf) maxlen = int( qcutils.get_keyvaluefromcf(cf, ["Options"], "MaxGapInterpolate", default=2)) if maxlen == 0: msg = " Gap fill by interpolation disabled in control file" logger.info(msg) return for label in label_list: section = qcutils.get_cfsection(cf, series=label) if "MaxGapInterpolate" in cf[section][label]: maxlen = int( qcutils.get_keyvaluefromcf(cf, [section, label], "MaxGapInterpolate", default=2)) if maxlen == 0: msg = " Gap fill by interpolation disabled for " + label logger.info(msg) continue qcts.InterpolateOverMissing(ds, series=label, maxlen=2)
def ApplyTurbulenceFilter_checks(cf,ds): """ Purpose: Usage: Author: Date: """ opt = {"OK":True,"turbulence_filter":"ustar","filter_list":['Fc']} # return if there is no Options section in control file if "Options" not in cf: msg = " ApplyTurbulenceFilter: Options section not found in control file" log.warning(msg) opt["OK"] = False return opt # get the value of the TurbulenceFilter key in the Options section opt["turbulence_filter"] = qcutils.get_keyvaluefromcf(cf,["Options"],"TurbulenceFilter",default="None") # return if turbulence filter disabled if opt["turbulence_filter"].lower()=="none": msg = " Turbulence filter disabled in control file at "+ds.globalattributes["nc_level"] log.info(msg) opt["OK"] = False return opt # check to see if filter type can be handled if opt["turbulence_filter"].lower() not in ["ustar","ustar_evg","l"]: msg = " Unrecognised turbulence filter option (" msg = msg+opt["turbulence_filter"]+"), no filter applied" log.error(msg) opt["OK"] = False return opt # get the list of series to be filtered if "FilterList" in cf["Options"]: opt["filter_list"] = ast.literal_eval(cf["Options"]["FilterList"]) # check to see if the series are in the data structure for item in opt["filter_list"]: if item not in ds.series.keys(): msg = " Series "+item+" given in FilterList not found in data stucture" log.warning(msg) opt["filter_list"].remove(item) # return if the filter list is empty if len(opt["filter_list"])==0: msg = " FilterList in control file is empty, skipping turbulence filter" log.warning(msg) opt["OK"] = False return opt # get the value of the DayNightFilter key in the Options section opt["daynight_filter"] = qcutils.get_keyvaluefromcf(cf,["Options"],"DayNightFilter",default="None") # check to see if filter type can be handled if opt["daynight_filter"].lower() not in ["fsd","sa","none"]: msg = " Unrecognised day/night filter option (" msg = msg+opt["daynight_filter"]+"), no filter applied" log.error(msg) opt["OK"] = False return opt # check to see if all day time values are to be accepted opt["accept_day_times"] = qcutils.get_keyvaluefromcf(cf,["Options"],"AcceptDayTimes",default="Yes") opt["use_evening_filter"] = qcutils.get_keyvaluefromcf(cf,["Options"],"UseEveningFilter",default="Yes") return opt
def get_configs_dict(cf,ds): # configs_dict = {'nan_value': -9999, # 'minimum_temperature_spread': 5, # 'step_size_days': 5, # 'window_size_days': 15, # 'min_pct_annual': 30, # 'min_pct_noct_window': 20, # 'min_pct_day_window': 50, # 'output_plots': False, # 'measurement_interval': 0.5, # 'QC_accept_code': 0, # 'plot_output_path': '/home/imchugh/Documents'} configs_dict = {} configs_dict["nan_value"] = int(c.missing_value) opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], "minimum_temperature_spread",default=5) configs_dict["minimum_temperature_spread"] = int(opt) opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], "step_size_days",default=5) configs_dict["step_size_days"] = int(opt) opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], "window_size_days",default=15) configs_dict["window_size_days"] = int(opt) opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], "minimum_percent_annual",default=30) configs_dict["minimum_pct_annual"] = int(opt) opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], "minimum_percent_noct_window",default=20) configs_dict["minimum_pct_noct_window"] = int(opt) #opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], #"minimum_percent_day_window", #default=50) #configs_dict["minimum_pct_day_window"] = int(opt) opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], "output_plots",default="False") configs_dict["output_plots"] = (opt=="True") opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], "target",default="ER") configs_dict["target"] = str(opt) opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], "drivers",default="['Ta']") configs_dict["drivers"] = ast.literal_eval(opt)[0] opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], "output",default="ER_LT_all") configs_dict["output_label"] = opt configs_dict["output_results"] = True ts = int(ds.globalattributes["time_step"]) configs_dict["measurement_interval"] = float(ts)/60.0 configs_dict["QC_accept_code"] = 0 opt = qcutils.get_keyvaluefromcf(cf,["Files"],"plot_path",default="plots/") configs_dict["output_path"] = os.path.join(opt,"respiration/") return configs_dict
def l1qc_process(cf, ds1): # get the netCDF attributes from the control file qcts.do_attributes(cf, ds1) # round the Python datetime to the nearest second qcutils.round_datetime(ds1, mode="nearest_second") #check for gaps in the Python datetime series and fix if present fixtimestepmethod = qcutils.get_keyvaluefromcf(cf, ["options"], "FixTimeStepMethod", default="round") if qcutils.CheckTimeStep(ds1): qcutils.FixTimeStep(ds1, fixtimestepmethod=fixtimestepmethod) # recalculate the Excel datetime qcutils.get_xldatefromdatetime(ds1) # get the Year, Month, Day etc from the Python datetime qcutils.get_ymdhmsfromdatetime(ds1) # write the processing level to a global attribute ds1.globalattributes['nc_level'] = str("L1") # get the start and end date from the datetime series unless they were # given in the control file if 'start_date' not in ds1.globalattributes.keys(): ds1.globalattributes['start_date'] = str( ds1.series['DateTime']['Data'][0]) if 'end_date' not in ds1.globalattributes.keys(): ds1.globalattributes['end_date'] = str( ds1.series['DateTime']['Data'][-1]) # calculate variances from standard deviations and vice versa qcts.CalculateStandardDeviations(cf, ds1) # create new variables using user defined functions qcts.DoFunctions(cf, ds1) # create a series of synthetic downwelling shortwave radiation qcts.get_synthetic_fsd(ds1)
def do_IRGAcheck(cf,ds): """ Purpose: Decide which IRGA check routine to use depending on the setting of the "irga_type" key in the [Options] section of the control file. The default is Li7500. Usage: Author: PRI Date: September 2015 """ irga_list = ["li7500","li7500a","li7500rs","ec150","ec155","irgason"] # get the IRGA type from the control file irga_type = qcutils.get_keyvaluefromcf(cf,["Options"],"irga_type", default="li7500") # remove any hyphens or spaces for item in ["-"," "]: if item in irga_type: irga_type = irga_type.replace(item,"") # check the IRGA type against the list of suppprted devices if irga_type.lower() not in irga_list: msg = " Unrecognised IRGA type "+irga_type+" given in control file, IRGA checks skipped ..." log.error(msg) return # do the IRGA checks if irga_type.lower()=="li7500": ds.globalattributes["irga_type"] = irga_type do_li7500check(cf,ds) elif irga_type.lower() in ["li7500a","irgason"]: ds.globalattributes["irga_type"] = irga_type do_li7500acheck(cf,ds) elif irga_type.lower() in ["ec155","ec150","irgason"]: ds.globalattributes["irga_type"] = irga_type do_EC155check(cf,ds) else: msg = " Unsupported IRGA type "+irga_type+", contact the devloper ..." log.error(msg) return
def ImportSeries(cf, ds): # check to see if there is an Imports section if "Imports" not in cf.keys(): return # number of records nRecs = int(ds.globalattributes["nc_nrecs"]) # get the start and end datetime ldt = ds.series["DateTime"]["Data"] start_date = ldt[0] end_date = ldt[-1] # loop over the series in the Imports section for label in cf["Imports"].keys(): import_filename = qcutils.get_keyvaluefromcf(cf, ["Imports", label], "file_name", default="") if import_filename == "": msg = " ImportSeries: import filename not found in control file, skipping ..." logger.warning(msg) continue var_name = qcutils.get_keyvaluefromcf(cf, ["Imports", label], "var_name", default="") if var_name == "": msg = " ImportSeries: variable name not found in control file, skipping ..." logger.warning(msg) continue ds_import = qcio.nc_read_series(import_filename) ts_import = ds_import.globalattributes["time_step"] ldt_import = ds_import.series["DateTime"]["Data"] si = qcutils.GetDateIndex(ldt_import, str(start_date), ts=ts_import, default=0, match="exact") ei = qcutils.GetDateIndex(ldt_import, str(end_date), ts=ts_import, default=len(ldt_import) - 1, match="exact") data = numpy.ma.ones(nRecs) * float(c.missing_value) flag = numpy.ma.ones(nRecs) data_import, flag_import, attr_import = qcutils.GetSeriesasMA( ds_import, var_name, si=si, ei=ei) ldt_import = ldt_import[si:ei + 1] index = qcutils.FindIndicesOfBInA(ldt_import, ldt) data[index] = data_import flag[index] = flag_import qcutils.CreateSeries(ds, label, data, flag, attr_import)
def l1qc(cf): # get the data series from the Excel file in_filename = qcio.get_infilenamefromcf(cf) if not qcutils.file_exists(in_filename, mode="quiet"): msg = " Input file " + in_filename + " not found ..." logger.error(msg) ds1 = qcio.DataStructure() ds1.returncodes = {"value": 1, "message": msg} return ds1 file_name, file_extension = os.path.splitext(in_filename) if "csv" in file_extension.lower(): ds1 = qcio.csv_read_series(cf) if ds1.returncodes["value"] != 0: return ds1 # get a series of Excel datetime from the Python datetime objects qcutils.get_xldatefromdatetime(ds1) else: ds1 = qcio.xl_read_series(cf) if ds1.returncodes["value"] != 0: return ds1 # get a series of Python datetime objects from the Excel datetime qcutils.get_datetimefromxldate(ds1) # get the netCDF attributes from the control file qcts.do_attributes(cf, ds1) # round the Python datetime to the nearest second qcutils.round_datetime(ds1, mode="nearest_second") #check for gaps in the Python datetime series and fix if present fixtimestepmethod = qcutils.get_keyvaluefromcf(cf, ["options"], "FixTimeStepMethod", default="round") if qcutils.CheckTimeStep(ds1): qcutils.FixTimeStep(ds1, fixtimestepmethod=fixtimestepmethod) # recalculate the Excel datetime qcutils.get_xldatefromdatetime(ds1) # get the Year, Month, Day etc from the Python datetime qcutils.get_ymdhmsfromdatetime(ds1) # write the processing level to a global attribute ds1.globalattributes['nc_level'] = str("L1") # get the start and end date from the datetime series unless they were # given in the control file if 'start_date' not in ds1.globalattributes.keys(): ds1.globalattributes['start_date'] = str( ds1.series['DateTime']['Data'][0]) if 'end_date' not in ds1.globalattributes.keys(): ds1.globalattributes['end_date'] = str( ds1.series['DateTime']['Data'][-1]) # calculate variances from standard deviations and vice versa qcts.CalculateStandardDeviations(cf, ds1) # create new variables using user defined functions qcts.DoFunctions(cf, ds1) # create a series of synthetic downwelling shortwave radiation qcts.get_synthetic_fsd(ds1) # check missing data and QC flags are consistent qcutils.CheckQCFlags(ds1) return ds1
def l1qc(cf): # get the data series from the Excel file in_filename = qcio.get_infilenamefromcf(cf) if not qcutils.file_exists(in_filename,mode="quiet"): msg = " Input file "+in_filename+" not found ..." log.error(msg) ds1 = qcio.DataStructure() ds1.returncodes = {"value":1,"message":msg} return ds1 file_name,file_extension = os.path.splitext(in_filename) if "csv" in file_extension.lower(): ds1 = qcio.csv_read_series(cf) if ds1.returncodes["value"] != 0: return ds1 # get a series of Excel datetime from the Python datetime objects qcutils.get_xldatefromdatetime(ds1) else: ds1 = qcio.xl_read_series(cf) if ds1.returncodes["value"] != 0: return ds1 # get a series of Python datetime objects from the Excel datetime qcutils.get_datetimefromxldate(ds1) # get the netCDF attributes from the control file qcts.do_attributes(cf,ds1) # round the Python datetime to the nearest second qcutils.round_datetime(ds1,mode="nearest_second") #check for gaps in the Python datetime series and fix if present fixtimestepmethod = qcutils.get_keyvaluefromcf(cf,["options"],"FixTimeStepMethod",default="round") if qcutils.CheckTimeStep(ds1): qcutils.FixTimeStep(ds1,fixtimestepmethod=fixtimestepmethod) # recalculate the Excel datetime qcutils.get_xldatefromdatetime(ds1) # get the Year, Month, Day etc from the Python datetime qcutils.get_ymdhmsfromdatetime(ds1) # write the processing level to a global attribute ds1.globalattributes['nc_level'] = str("L1") # get the start and end date from the datetime series unless they were # given in the control file if 'start_date' not in ds1.globalattributes.keys(): ds1.globalattributes['start_date'] = str(ds1.series['DateTime']['Data'][0]) if 'end_date' not in ds1.globalattributes.keys(): ds1.globalattributes['end_date'] = str(ds1.series['DateTime']['Data'][-1]) # calculate variances from standard deviations and vice versa qcts.CalculateStandardDeviations(cf,ds1) # create new variables using user defined functions qcts.DoFunctions(cf,ds1) # create a series of synthetic downwelling shortwave radiation qcts.get_synthetic_fsd(ds1) return ds1
# number of records ds_60minutes.globalattributes["nc_nrecs"] = nRecs # processing level ds_60minutes.globalattributes["nc_level"] = "L1" # latitude and longitude, chose central pixel of 3x3 grid ds_60minutes.globalattributes["latitude"] = f.variables["lat"][1] ds_60minutes.globalattributes["longitude"] = f.variables["lon"][1] # put the ACCESS data into the 60 minute data structure ds_60minutes # make a QC flag with a value of 0 flag_60minutes = numpy.zeros(nRecs) # loop over the variables defined in the control file for item in ["valid_date","valid_time","lat","lon"]: if item in var_list: var_list.remove(item) for var in var_list: # get the name of the ACCESS variable access_name = qcutils.get_keyvaluefromcf(cf,["Variables",var],"access_name",default=var) if access_name not in f.variables.keys(): logging.error("Requested variable "+access_name+" not found in ACCESS data") continue attr = {} for this_attr in f.varattr[access_name].keys(): attr[this_attr] = f.varattr[access_name][this_attr] attr["missing_value"] = c.missing_value # loop over all ACCESS grids and give them standard OzFlux names with the grid idices appended for i in range(0,3): for j in range(0,3): if len(f.variables[access_name].shape)==3: var_ij = var+'_'+str(i)+str(j) series = f.variables[access_name][:,i,j] qcutils.CreateSeries(ds_60minutes,var_ij,series,Flag=flag_60minutes,Attr=attr) elif len(f.variables[access_name].shape)==4:
def get_accessdata(cf,ds_60minutes,f,info): # latitude and longitude, chose central pixel of 3x3 grid ds_60minutes.globalattributes["latitude"] = f.variables["lat"][1] ds_60minutes.globalattributes["longitude"] = f.variables["lon"][1] # list of variables to process var_list = list(cf["Variables"].keys()) # get a series of Python datetimes and put this into the data structure valid_date = f.variables["valid_date"][:] nRecs = len(valid_date) valid_time = f.variables["valid_time"][:] dl = [datetime.datetime.strptime(str(int(valid_date[i])*10000+int(valid_time[i])),"%Y%m%d%H%M") for i in range(0,nRecs)] dt_utc_all = numpy.array(dl) time_step = numpy.array([(dt_utc_all[i]-dt_utc_all[i-1]).total_seconds() for i in range(1,len(dt_utc_all))]) time_step = numpy.append(time_step,3600) idxne0 = numpy.where(time_step!=0)[0] idxeq0 = numpy.where(time_step==0)[0] idx_clipped = numpy.where((idxeq0>0)&(idxeq0<nRecs))[0] idxeq0 = idxeq0[idx_clipped] dt_utc = dt_utc_all[idxne0] dt_utc = [x.replace(tzinfo=pytz.utc) for x in dt_utc] dt_loc = [x.astimezone(info["site_tz"]) for x in dt_utc] dt_loc = [x-x.dst() for x in dt_loc] dt_loc = [x.replace(tzinfo=None) for x in dt_loc] flag = numpy.zeros(len(dt_loc),dtype=numpy.int32) ds_60minutes.series["DateTime"] = {} ds_60minutes.series["DateTime"]["Data"] = dt_loc ds_60minutes.series["DateTime"]["Flag"] = flag ds_60minutes.series["DateTime_UTC"] = {} ds_60minutes.series["DateTime_UTC"]["Data"] = dt_utc ds_60minutes.series["DateTime_UTC"]["Flag"] = flag nRecs = len(ds_60minutes.series["DateTime"]["Data"]) ds_60minutes.globalattributes["nc_nrecs"] = nRecs # we're done with valid_date and valid_time, drop them from the variable list for item in ["valid_date","valid_time","lat","lon"]: if item in var_list: var_list.remove(item) # create the QC flag with all zeros nRecs = ds_60minutes.globalattributes["nc_nrecs"] flag_60minutes = numpy.zeros(nRecs,dtype=numpy.int32) # get the UTC hour hr_utc = [x.hour for x in dt_utc] attr = qcutils.MakeAttributeDictionary(long_name='UTC hour') qcutils.CreateSeries(ds_60minutes,'Hr_UTC',hr_utc,Flag=flag_60minutes,Attr=attr) # now loop over the variables listed in the control file for label in var_list: # get the name of the variable in the ACCESS file access_name = qcutils.get_keyvaluefromcf(cf,["Variables",label],"access_name",default=label) # warn the user if the variable not found if access_name not in list(f.variables.keys()): msg = "Requested variable "+access_name msg = msg+" not found in ACCESS data" logging.error(msg) continue # get the variable attibutes attr = get_variableattributes(f,access_name) # loop over the 3x3 matrix of ACCESS grid data supplied for i in range(0,3): for j in range(0,3): label_ij = label+'_'+str(i)+str(j) if len(f.variables[access_name].shape)==3: series = f.variables[access_name][:,i,j] elif len(f.variables[access_name].shape)==4: series = f.variables[access_name][:,0,i,j] else: msg = "Unrecognised variable ("+label msg = msg+") dimension in ACCESS file" logging.error(msg) series = series[idxne0] qcutils.CreateSeries(ds_60minutes,label_ij,series, Flag=flag_60minutes,Attr=attr) return
def access_read_mfiles2(file_list,var_list=[]): f = ACCESSData() # check that we have a list of files to process if len(file_list)==0: print("access_read_mfiles: empty file_list received, returning ...") return f # make sure latitude and longitude are read if "lat" not in var_list: var_list.append("lat") if "lon" not in var_list: var_list.append("lon") # make sure valid_date and valid_time are read if "valid_date" not in var_list: var_list.append("valid_date") if "valid_time" not in var_list: var_list.append("valid_time") for file_name in file_list: # open the netCDF file ncfile = netCDF4.Dataset(file_name) # check the number of records dims = ncfile.dimensions shape = (len(dims["time"]),len(dims["lat"]),len(dims["lon"])) # move to the next file if this file doesn't have 25 time records if shape[0]!=1: print("access_read_mfiles: length of time dimension in "+file_name+" is "+str(shape[0])+" (expected 1)") continue # move to the next file if this file doesn't have 3 latitude records if shape[1]!=3: print("access_read_mfiles: length of lat dimension in "+file_name+" is "+str(shape[1])+" (expected 3)") continue # move to the next file if this file doesn't have 3 longitude records if shape[2]!=3: print("access_read_mfiles: length of lon dimension in "+file_name+" is "+str(shape[2])+" (expected 3)") continue # seems OK to continue with this file ... # add the file name to the file_list in the global attributes f.globalattr["file_list"].append(file_name) # get the global attributes for gattr in ncfile.ncattrs(): if gattr not in f.globalattr: f.globalattr[gattr] = getattr(ncfile,gattr) # if no variable list was passed to this routine, use all variables if len(var_list)==0: var_list=list(ncfile.variables.keys()) # load the data into the data structure for var in var_list: # get the name of the variable in the ACCESS file access_name = qcutils.get_keyvaluefromcf(cf,["Variables",var],"access_name",default=var) # check that the requested variable exists in the ACCESS file if access_name in list(ncfile.variables.keys()): # check to see if the variable is already in the data structure if access_name not in list(f.variables.keys()): f.variables[access_name] = ncfile.variables[access_name][:] else: f.variables[access_name] = numpy.concatenate((f.variables[access_name],ncfile.variables[access_name][:]),axis=0) # now copy the variable attribiutes # create the variable attribute dictionary if access_name not in f.varattr: f.varattr[access_name] = {} # loop over the variable attributes for this_attr in ncfile.variables[access_name].ncattrs(): # check to see if the attribute has already if this_attr not in list(f.varattr[access_name].keys()): # add the variable attribute if it's not there already f.varattr[access_name][this_attr] = getattr(ncfile.variables[access_name],this_attr) else: print("access_read_mfiles: ACCESS variable "+access_name+" not found in "+file_name) if access_name not in list(f.variables.keys()): f.variables[access_name] = makedummyseries(shape) else: f.variables[access_name] = numpy.concatenate((f.variables[access_name],makedummyseries(shape)),axis=0) # close the netCDF file ncfile.close() # return with the data structure return f
def rpLT_createdict(cf, ds, series): """ Purpose: Creates a dictionary in ds to hold information about estimating ecosystem respiration using the Lloyd-Taylor method. Usage: Author: PRI Date October 2015 """ # get the section of the control file containing the series section = qcutils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: logger.error("ERUsingLloydTaylor: Series " + series + " not found in control file, skipping ...") return # check that none of the drivers have missing data driver_list = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) target = cf[section][series]["ERUsingLloydTaylor"]["target"] for label in driver_list: data, flag, attr = qcutils.GetSeriesasMA(ds, label) if numpy.ma.count_masked(data) != 0: logger.error("ERUsingLloydTaylor: driver " + label + " contains missing data, skipping target " + target) return # create the dictionary keys for this series rpLT_info = {} # site name rpLT_info["site_name"] = ds.globalattributes["site_name"] # source series for ER opt = qcutils.get_keyvaluefromcf(cf, [section, series, "ERUsingLloydTaylor"], "source", default="Fc") rpLT_info["source"] = opt # target series name rpLT_info["target"] = cf[section][series]["ERUsingLloydTaylor"]["target"] # list of drivers rpLT_info["drivers"] = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) # name of SOLO output series in ds rpLT_info["output"] = cf[section][series]["ERUsingLloydTaylor"]["output"] # results of best fit for plotting later on rpLT_info["results"] = { "startdate": [], "enddate": [], "No. points": [], "r": [], "Bias": [], "RMSE": [], "Frac Bias": [], "NMSE": [], "Avg (obs)": [], "Avg (LT)": [], "Var (obs)": [], "Var (LT)": [], "Var ratio": [], "m_ols": [], "b_ols": [] } # create the configuration dictionary rpLT_info["configs_dict"] = get_configs_dict(cf, ds) # create an empty series in ds if the output series doesn't exist yet if rpLT_info["output"] not in ds.series.keys(): data, flag, attr = qcutils.MakeEmptySeries(ds, rpLT_info["output"]) qcutils.CreateSeries(ds, rpLT_info["output"], data, flag, attr) # create the merge directory in the data structure if "merge" not in dir(ds): ds.merge = {} if "standard" not in ds.merge.keys(): ds.merge["standard"] = {} # create the dictionary keys for this series ds.merge["standard"][series] = {} # output series name ds.merge["standard"][series]["output"] = series # source ds.merge["standard"][series]["source"] = ast.literal_eval( cf[section][series]["MergeSeries"]["Source"]) # create an empty series in ds if the output series doesn't exist yet if ds.merge["standard"][series]["output"] not in ds.series.keys(): data, flag, attr = qcutils.MakeEmptySeries( ds, ds.merge["standard"][series]["output"]) qcutils.CreateSeries(ds, ds.merge["standard"][series]["output"], data, flag, attr) return rpLT_info
def get_configs_dict(cf, ds): # configs_dict = {'nan_value': -9999, # 'minimum_temperature_spread': 5, # 'step_size_days': 5, # 'window_size_days': 15, # 'min_pct_annual': 30, # 'min_pct_noct_window': 20, # 'min_pct_day_window': 50, # 'output_plots': False, # 'measurement_interval': 0.5, # 'QC_accept_code': 0, # 'plot_output_path': '/home/imchugh/Documents'} configs_dict = {} configs_dict["nan_value"] = int(c.missing_value) opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "minimum_temperature_spread", default=5) configs_dict["minimum_temperature_spread"] = int(opt) opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "step_size_days", default=5) configs_dict["step_size_days"] = int(opt) opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "window_size_days", default=15) configs_dict["window_size_days"] = int(opt) opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "minimum_percent_annual", default=30) configs_dict["minimum_pct_annual"] = int(opt) opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "minimum_percent_noct_window", default=20) configs_dict["minimum_pct_noct_window"] = int(opt) #opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"], #"minimum_percent_day_window", #default=50) #configs_dict["minimum_pct_day_window"] = int(opt) opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "output_plots", default="False") configs_dict["output_plots"] = (opt == "True") opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "show_plots", default="False") configs_dict["show_plots"] = (opt == "True") opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "target", default="ER") configs_dict["target"] = str(opt) opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "drivers", default="['Ta']") configs_dict["drivers"] = ast.literal_eval(opt)[0] opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"], "output", default="ER_LT_all") configs_dict["output_label"] = opt configs_dict["output_results"] = True ts = int(ds.globalattributes["time_step"]) configs_dict["measurement_interval"] = float(ts) / 60.0 configs_dict["QC_accept_code"] = 0 opt = qcutils.get_keyvaluefromcf(cf, ["Files"], "plot_path", default="plots/") configs_dict["output_path"] = os.path.join(opt, "respiration/") return configs_dict
def gfSOLO_createdict(cf, ds, series): """ Creates a dictionary in ds to hold information about the SOLO data used to gap fill the tower data.""" # get the section of the control file containing the series section = qcutils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: logger.error( "GapFillUsingSOLO: Series %s not found in control file, skipping ...", series) return # create the solo directory in the data structure if "solo" not in dir(ds): ds.solo = {} # name of SOLO output series in ds output_list = cf[section][series]["GapFillUsingSOLO"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this series ds.solo[output] = {} # get the target if "target" in cf[section][series]["GapFillUsingSOLO"][output]: ds.solo[output]["label_tower"] = cf[section][series][ "GapFillUsingSOLO"][output]["target"] else: ds.solo[output]["label_tower"] = series # site name ds.solo[output]["site_name"] = ds.globalattributes["site_name"] # list of SOLO settings if "solo_settings" in cf[section][series]["GapFillUsingSOLO"][output]: ss_list = ast.literal_eval(cf[section][series]["GapFillUsingSOLO"] [output]["solo_settings"]) ds.solo[output]["solo_settings"] = {} ds.solo[output]["solo_settings"]["nodes_target"] = int(ss_list[0]) ds.solo[output]["solo_settings"]["training"] = int(ss_list[1]) ds.solo[output]["solo_settings"]["factor"] = int(ss_list[2]) ds.solo[output]["solo_settings"]["learningrate"] = float( ss_list[3]) ds.solo[output]["solo_settings"]["iterations"] = int(ss_list[4]) # list of drivers ds.solo[output]["drivers"] = ast.literal_eval( cf[section][series]["GapFillUsingSOLO"][output]["drivers"]) # apply ustar filter opt = qcutils.get_keyvaluefromcf( cf, [section, series, "GapFillUsingSOLO", output], "turbulence_filter", default="") ds.solo[output]["turbulence_filter"] = opt opt = qcutils.get_keyvaluefromcf( cf, [section, series, "GapFillUsingSOLO", output], "daynight_filter", default="") ds.solo[output]["daynight_filter"] = opt # results of best fit for plotting later on ds.solo[output]["results"] = { "startdate": [], "enddate": [], "No. points": [], "r": [], "Bias": [], "RMSE": [], "Frac Bias": [], "NMSE": [], "Avg (obs)": [], "Avg (SOLO)": [], "Var (obs)": [], "Var (SOLO)": [], "Var ratio": [], "m_ols": [], "b_ols": [] } # create an empty series in ds if the SOLO output series doesn't exist yet if output not in ds.series.keys(): data, flag, attr = qcutils.MakeEmptySeries(ds, output) qcutils.CreateSeries(ds, output, data, flag, attr)
def CPD_run(cf): # *** original code from IMcH ## Prompt user for configuration file and get it #root = Tkinter.Tk(); root.withdraw() #cfName = tkFileDialog.askopenfilename(initialdir='') #root.destroy() #cf=ConfigObj(cfName) # Set input file and output path and create directories for plots and results file_in = os.path.join(cf['Files']['file_path'],cf['Files']['in_filename']) path_out = cf['Files']['file_path'] #path_out = os.path.join(path_out,'CPD') file_out = os.path.join(cf['Files']['file_path'],cf['Files']['in_filename'].replace(".nc","_CPD.xls")) plot_path = "plots/" if "plot_path" in cf["Files"]: plot_path = os.path.join(cf["Files"]["plot_path"],"CPD/") #plot_path = os.path.join(path_out,'Plots') if not os.path.isdir(plot_path): os.makedirs(plot_path) results_path = path_out if not os.path.isdir(results_path): os.makedirs(results_path) # **** original code from IMcH #file_in=os.path.join(cf['files']['input_path'],cf['files']['input_file']) #path_out=cf['files']['output_path'] #plot_path_out=os.path.join(path_out,'Plots') #if not os.path.isdir(plot_path_out): os.makedirs(os.path.join(path_out,'Plots')) #results_path_out=os.path.join(path_out,'Results') #if not os.path.isdir(results_path_out): os.makedirs(os.path.join(path_out,'Results')) # Get user-set variable names from config file # *** original code from IMcH #vars_data=[cf['variables']['data'][i] for i in cf['variables']['data']] #vars_QC=[cf['variables']['QC'][i] for i in cf['variables']['QC']] #vars_all=vars_data+vars_QC vars_data = [] for item in cf["Variables"].keys(): if "AltVarName" in cf["Variables"][item].keys(): vars_data.append(str(cf["Variables"][item]["AltVarName"])) else: vars_data.append(str(item)) vars_QC = [] for item in vars_data: vars_QC.append(item+"_QCFlag") vars_all = vars_data+vars_QC # Read .nc file # *** original code from IMcH #nc_obj=netCDF4.Dataset(file_in) #flux_frequency=int(nc_obj.time_step) #dates_list=[dt.datetime(*xlrd.xldate_as_tuple(elem,0)) for elem in nc_obj.variables['xlDateTime']] #d={} #for i in vars_all: #d[i]=nc_obj.variables[i][:] #nc_obj.close() #df=pd.DataFrame(d,index=dates_list) log.info(' Reading netCDF file '+file_in) ncFile = netCDF4.Dataset(file_in) flux_period=int(ncFile.time_step) dates_list=[dt.datetime(*xlrd.xldate_as_tuple(elem,0)) for elem in ncFile.variables['xlDateTime']] d={} for item in vars_all: nDims = len(ncFile.variables[item].shape) if nDims not in [1,3]: msg = "CPD_run: unrecognised number of dimensions ("+str(nDims) msg = msg+") for netCDF variable "+item raise Exception(msg) if nDims==1: # single dimension d[item] = ncFile.variables[item][:] elif nDims==3: # 3 dimensions d[item] = ncFile.variables[item][:,0,0] df=pd.DataFrame(d,index=dates_list) # Build dictionary of additional configs # *** original code from IMcH #d={} #d['radiation_threshold']=int(cf['options']['radiation_threshold']) #d['num_bootstraps']=int(cf['options']['num_bootstraps']) #d['flux_frequency']=flux_frequency #if cf['options']['output_plots']=='True': #d['plot_output_path']=plot_path_out #if cf['options']['output_results']=='True': #d['results_output_path']=results_path_out d={} d['radiation_threshold']=int(cf['Options']['Fsd_threshold']) d['num_bootstraps']=int(cf['Options']['Num_bootstraps']) d['flux_period']=flux_period d['site_name']=getattr(ncFile,"site_name") d["call_mode"]=qcutils.get_keyvaluefromcf(cf,["Options"],"call_mode",default="interactive",mode="quiet") d["show_plots"]=qcutils.get_keyvaluefromcf(cf,["Options"],"show_plots",default=True,mode="quiet") if cf['Options']['Output_plots']=='True': d['plot_path']=plot_path if cf['Options']['Output_results']=='True': d['results_path']=results_path d["file_out"]=file_out # Replace configured error values with NaNs and remove data with unacceptable QC codes, then drop flags # *** original code from IMcH #df.replace(int(cf['options']['nan_value']),np.nan) #if 'QC_accept_codes' in cf['options']: #QC_accept_codes=ast.literal_eval(cf['options']['QC_accept_codes']) #eval_string='|'.join(['(df[vars_QC[i]]=='+str(i)+')' for i in QC_accept_codes]) #for i in xrange(4): #df[vars_data[i]]=np.where(eval(eval_string),df[vars_data[i]],np.nan) df.replace(c.missing_value,np.nan) eval_string='|'.join(['(df[vars_QC[i]]=='+str(i)+')' for i in [0,10]]) #for i in xrange(len(vars_data)): for i in range(len(vars_data)): df[vars_data[i]]=np.where(eval(eval_string),df[vars_data[i]],np.nan) df=df[vars_data] ncFile.close() return df,d
start = modis_dt[0] end = modis_dt[-1] modis_dt_interp = [ result for result in perdelta(start, end, datetime.timedelta(minutes=ts)) ] modis_time_interp = netCDF4.date2num(modis_dt_interp, modis_time_units) modis_time_masked = numpy.ma.masked_where( numpy.ma.getmaskarray(evi_masked_median) == True, modis_time) modis_time_comp = numpy.ma.compressed(modis_time_masked) evi_masked_median_comp = numpy.ma.compressed(evi_masked_median) x_org = modis_time_comp y_org = evi_masked_median_comp # interpolate onto the tower time step interp_type = qcutils.get_keyvaluefromcf(cf, ["EVI"], "interp_type", default="linear") if interp_type.lower() not in ["linear", "smooth_interp"]: msg = " Unrecognised interpolation type (" + interp_type + "), using linear ..." log.warning(msg) interp_type = "linear" if interp_type.lower() == "linear": # linear interpolation log.info(" Using linear interpolation") f = scipy.interpolate.interp1d(x_org, y_org, bounds_error=False) evi_interp = f(modis_time_interp) filter_type = qcutils.get_keyvaluefromcf(cf, ["EVI"], "filter_type", default="savgol") if filter_type.lower() not in ["savgol"]: msg = " Unrecognised filter type (" + filter_type + "), using Savitsky-Golay ..."
cfname = cf_batch["Levels"][level][i] logging.info('Starting FluxNet output with '+cfname) cf = qcio.get_controlfilecontents(cfname) qcio.fn_write_csv(cf) logging.info('Finished FluxNet output with '+cfname) logging.info('') elif level.lower()=="concatenate": # concatenate netCDF files for i in cf_batch["Levels"][level].keys(): cfname = cf_batch["Levels"][level][i] logging.info('Starting concatenation with '+cfname) cf_cc = qcio.get_controlfilecontents(cfname) qcio.nc_concatenate(cf_cc) logging.info('Finished concatenation with '+cfname) # now plot the fingerprints for the concatenated files opt = qcutils.get_keyvaluefromcf(cf_cc,["Options"],"DoFingerprints", default="yes") if opt.lower()=="no": continue cf_fp = qcio.get_controlfilecontents("controlfiles/standard/fingerprint.txt") if "Files" not in dir(cf_fp): cf_fp["Files"] = {} file_name = cf_cc["Files"]["Out"]["ncFileName"] file_path = ntpath.split(file_name)[0]+"/" cf_fp["Files"]["file_path"] = file_path cf_fp["Files"]["in_filename"] = ntpath.split(file_name)[1] cf_fp["Files"]["plot_path"] = file_path[:file_path.index("Data")]+"Plots/" if "Options" not in cf_fp: cf_fp["Options"]={} cf_fp["Options"]["call_mode"] = "batch" cf_fp["Options"]["show_plots"] = "no" logging.info('Doing fingerprint plots using '+cf_fp["Files"]["in_filename"]) qcplot.plot_fingerprint(cf_fp) logging.info('Finished fingerprint plots') logging.info('')
def do_dependencycheck(cf, ds, section, series, code=23, mode="quiet"): """ Purpose: Usage: Author: PRI Date: Back in the day """ if len(section) == 0 and len(series) == 0: return if len(section) == 0: section = qcutils.get_cfsection(cf, series=series, mode='quiet') if "DependencyCheck" not in cf[section][series].keys(): return if "Source" not in cf[section][series]["DependencyCheck"]: msg = " DependencyCheck: keyword Source not found for series " + series + ", skipping ..." logger.error(msg) return if mode == "verbose": msg = " Doing DependencyCheck for " + series logger.info(msg) # get the precursor source list from the control file source_list = ast.literal_eval( cf[section][series]["DependencyCheck"]["Source"]) # check to see if the "ignore_missing" flag is set opt = qcutils.get_keyvaluefromcf(cf, [section, series, "DependencyCheck"], "ignore_missing", default="no") ignore_missing = False if opt.lower() in ["yes", "y", "true", "t"]: ignore_missing = True # get the data dependent_data, dependent_flag, dependent_attr = qcutils.GetSeries( ds, series) # loop over the precursor source list for item in source_list: # check the precursor is in the data structure if item not in ds.series.keys(): msg = " DependencyCheck: " + series + " precursor series " + item + " not found, skipping ..." logger.warning(msg) continue # get the precursor data precursor_data, precursor_flag, precursor_attr = qcutils.GetSeries( ds, item) # check if the user wants to ignore missing precursor data if ignore_missing: # they do, so make an array of missing values nRecs = int(ds.globalattributes["nc_nrecs"]) missing_array = numpy.ones(nRecs) * float(c.missing_value) # and find the indicies of elements equal to the missing value bool_array = numpy.isclose(precursor_data, missing_array) idx = numpy.where(bool_array == True)[0] # and set these flags to 0 so missing data is ignored precursor_flag[idx] = numpy.int32(0) # mask the dependent data where the precursor flag shows data not OK dependent_data = numpy.ma.masked_where( numpy.mod(precursor_flag, 10) != 0, dependent_data) # get an index where the precursor flag shows data not OK idx = numpy.ma.where(numpy.mod(precursor_flag, 10) != 0)[0] # set the dependent QC flag dependent_flag[idx] = numpy.int32(code) # put the data back into the data structure dependent_attr["DependencyCheck_source"] = str(source_list) qcutils.CreateSeries(ds, series, dependent_data, dependent_flag, dependent_attr) # our work here is done return
def access_read_mfiles2(file_list,var_list=[]): f = ACCESSData() # check that we have a list of files to process if len(file_list)==0: print "access_read_mfiles: empty file_list received, returning ..." return f # make sure latitude and longitude are read if "lat" not in var_list: var_list.append("lat") if "lon" not in var_list: var_list.append("lon") # make sure valid_date and valid_time are read if "valid_date" not in var_list: var_list.append("valid_date") if "valid_time" not in var_list: var_list.append("valid_time") for file_name in file_list: # open the netCDF file ncfile = Dataset(file_name) # check the number of records dims = ncfile.dimensions shape = (len(dims["time"]),len(dims["lat"]),len(dims["lon"])) # move to the next file if this file doesn't have 25 time records if shape[0]!=25: print "access_read_mfiles: length of time dimension in "+file_name+" is "+str(shape[0])+" (expected 25)" continue # move to the next file if this file doesn't have 3 latitude records if shape[1]!=3: print "access_read_mfiles: length of lat dimension in "+file_name+" is "+str(shape[1])+" (expected 3)" continue # move to the next file if this file doesn't have 3 longitude records if shape[2]!=3: print "access_read_mfiles: length of lon dimension in "+file_name+" is "+str(shape[2])+" (expected 3)" continue # seems OK to continue with this file ... # add the file name to the file_list in the global attributes f.globalattr["file_list"].append(file_name) # get the global attributes for gattr in ncfile.ncattrs(): if gattr not in f.globalattr: f.globalattr[gattr] = getattr(ncfile,gattr) # if no variable list was passed to this routine, use all variables if len(var_list)==0: var_list=ncfile.variables.keys() # load the data into the data structure for var in var_list: # get the name of the variable in the ACCESS file access_name = qcutils.get_keyvaluefromcf(cf,["Variables",var],"access_name",default=var) # check that the requested variable exists in the ACCESS file if access_name in ncfile.variables.keys(): # check to see if the variable is already in the data structure if access_name not in f.variables.keys(): f.variables[access_name] = ncfile.variables[access_name][:] else: f.variables[access_name] = numpy.concatenate((f.variables[access_name],ncfile.variables[access_name][:]),axis=0) # now copy the variable attribiutes # create the variable attribute dictionary if access_name not in f.varattr: f.varattr[access_name] = {} # loop over the variable attributes for this_attr in ncfile.variables[access_name].ncattrs(): # check to see if the attribute has already if this_attr not in f.varattr[access_name].keys(): # add the variable attribute if it's not there already f.varattr[access_name][this_attr] = getattr(ncfile.variables[access_name],this_attr) else: print "access_read_mfiles: ACCESS variable "+access_name+" not found in "+file_name if access_name not in f.variables.keys(): f.variables[access_name] = makedummyseries(shape) else: f.variables[access_name] = numpy.concatenate((f.variables[access_name],makedummyseries(shape)),axis=0) # close the netCDF file ncfile.close() # return with the data structure return f
# now loop over the sies for site in site_list: # get the output file name if not os.path.exists(cf["Sites"][site]["out_filepath"]): os.makedirs(cf["Sites"][site]["out_filepath"]) out_filename = os.path.join(cf["Sites"][site]["out_filepath"], cf["Sites"][site]["out_filename"]) # get the metadata from the control file site_name = cf["Sites"][site]["site_name"] print " Processing " + site_name site_timezone = cf["Sites"][site]["site_timezone"] site_latitude = float(cf["Sites"][site]["site_latitude"]) site_longitude = float(cf["Sites"][site]["site_longitude"]) site_timestep = int(cf["Sites"][site]["site_timestep"]) site_sa_limit = qcutils.get_keyvaluefromcf(cf, ["Sites", site], "site_sa_limit", default=5) # index of the site in latitude dimension site_lat_index = int(((latitude[0] - site_latitude) / lat_resolution) + 0.5) erai_latitude = latitude[site_lat_index] # index of the site in longitude dimension if site_longitude < 0: site_longitude = float(360) + site_longitude site_lon_index = int(( (site_longitude - longitude[0]) / lon_resolution) + 0.5) erai_longitude = longitude[site_lon_index] print " Site coordinates: ", site_latitude, site_longitude print " ERAI grid: ", latitude[site_lat_index], longitude[ site_lon_index] # get an instance of the Datastructure ds_erai = qcio.DataStructure()
def CPD_run(cf): # Set input file and output path and create directories for plots and results path_out = cf['Files']['file_path'] file_in = os.path.join(cf['Files']['file_path'], cf['Files']['in_filename']) # if "out_filename" in cf['Files']: file_out = os.path.join(cf['Files']['file_path'], cf['Files']['out_filename']) else: file_out = os.path.join( cf['Files']['file_path'], cf['Files']['in_filename'].replace(".nc", "_CPD.xls")) plot_path = "plots/" if "plot_path" in cf["Files"]: plot_path = os.path.join(cf["Files"]["plot_path"], "CPD/") if not os.path.isdir(plot_path): os.makedirs(plot_path) results_path = path_out if not os.path.isdir(results_path): os.makedirs(results_path) # get a dictionary of the variable names var_list = cf["Variables"].keys() names = {} for item in var_list: if "AltVarName" in cf["Variables"][item].keys(): names[item] = cf["Variables"][item]["AltVarName"] else: names[item] = item # add the xlDateTime names["xlDateTime"] = "xlDateTime" names["Year"] = "Year" # read the netcdf file logger.info(' Reading netCDF file ' + file_in) ds = qcio.nc_read_series(file_in) dates_list = ds.series["DateTime"]["Data"] nrecs = int(ds.globalattributes["nc_nrecs"]) # now get the data d = {} f = {} for item in names.keys(): data, flag, attr = qcutils.GetSeries(ds, names[item]) d[item] = np.where(data == c.missing_value, np.nan, data) f[item] = flag # set all data to NaNs if any flag not 0 or 10 for item in f.keys(): for f_OK in [0, 10]: idx = np.where(f[item] != 0)[0] if len(idx) != 0: for itemd in d.keys(): d[itemd][idx] = np.nan df = pd.DataFrame(d, index=dates_list) # replace missing values with NaN df.replace(c.missing_value, np.nan) # Build dictionary of additional configs d = {} d['radiation_threshold'] = int(cf['Options']['Fsd_threshold']) d['num_bootstraps'] = int(cf['Options']['Num_bootstraps']) d['flux_period'] = int(ds.globalattributes["time_step"]) d['site_name'] = ds.globalattributes["site_name"] d["call_mode"] = qcutils.get_keyvaluefromcf(cf, ["Options"], "call_mode", default="interactive", mode="quiet") d["show_plots"] = qcutils.get_keyvaluefromcf(cf, ["Options"], "show_plots", default=True, mode="quiet") d['plot_tclass'] = False if cf['Options']['Plot_TClass'] == 'True': d['plot_tclass'] = True if cf['Options']['Output_plots'] == 'True': d['plot_path'] = plot_path if cf['Options']['Output_results'] == 'True': d['results_path'] = results_path d["file_out"] = file_out return df, d
def l4qc(cf, ds3): # !!! code here to use existing L4 file # logic # if the L4 doesn't exist # - create ds4 by using copy.deepcopy(ds3) # if the L4 does exist and the "UseExistingL4File" option is False # - create ds4 by using copy.deepcopy(ds3) # if the L4 does exist and the "UseExistingL4File" option is True # - read the contents of the L4 netCDF file # - check the start and end dates of the L3 and L4 data # - if these are the same then tell the user there is nothing to do # - copy the L3 data to the L4 data structure # - replace the L3 data with the L4 data #ds4 = copy.deepcopy(ds3) ds4 = qcio.copy_datastructure(cf, ds3) # ds4 will be empty (logical false) if an error occurs in copy_datastructure # return from this routine if this is the case if not ds4: return ds4 # set some attributes for this level qcutils.UpdateGlobalAttributes(cf, ds4, "L4") ds4.cf = cf # calculate the available energy if "Fa" not in ds4.series.keys(): qcts.CalculateAvailableEnergy(ds4, Fa_out='Fa', Fn_in='Fn', Fg_in='Fg') # create a dictionary to hold the gap filling data ds_alt = {} # check to see if we have any imports qcgf.ImportSeries(cf, ds4) # re-apply the quality control checks (range, diurnal and rules) qcck.do_qcchecks(cf, ds4) # now do the meteorological driver gap filling for ThisOne in cf["Drivers"].keys(): if ThisOne not in ds4.series.keys(): log.error("Series " + ThisOne + " not in data structure") continue # parse the control file for information on how the user wants to do the gap filling qcgf.GapFillParseControlFile(cf, ds4, ThisOne, ds_alt) # *** start of the section that does the gap filling of the drivers *** # fill short gaps using interpolation qcgf.GapFillUsingInterpolation(cf, ds4) # gap fill using climatology qcgf.GapFillFromClimatology(ds4) # do the gap filling using the ACCESS output qcgf.GapFillFromAlternate(cf, ds4, ds_alt) if ds4.returncodes["alternate"] == "quit": return ds4 # gap fill using SOLO qcgf.GapFillUsingSOLO(cf, ds3, ds4) if ds4.returncodes["solo"] == "quit": return ds4 # merge the first group of gap filled drivers into a single series qcts.MergeSeriesUsingDict(ds4, merge_order="prerequisite") # re-calculate the ground heat flux but only if requested in control file opt = qcutils.get_keyvaluefromcf(cf, ["Options"], "CorrectFgForStorage", default="No", mode="quiet") if opt.lower() != "no": qcts.CorrectFgForStorage(cf, ds4, Fg_out='Fg', Fg_in='Fg_Av', Ts_in='Ts', Sws_in='Sws') # re-calculate the net radiation qcts.CalculateNetRadiation(cf, ds4, Fn_out='Fn', Fsd_in='Fsd', Fsu_in='Fsu', Fld_in='Fld', Flu_in='Flu') # re-calculate the available energy qcts.CalculateAvailableEnergy(ds4, Fa_out='Fa', Fn_in='Fn', Fg_in='Fg') # merge the second group of gap filled drivers into a single series qcts.MergeSeriesUsingDict(ds4, merge_order="standard") # re-calculate the water vapour concentrations qcts.CalculateHumiditiesAfterGapFill(ds4) # re-calculate the meteorological variables qcts.CalculateMeteorologicalVariables(ds4) # the Tumba rhumba qcts.CalculateComponentsFromWsWd(ds4) # check for any missing data qcutils.get_missingingapfilledseries(ds4) # write the percentage of good data as a variable attribute qcutils.get_coverage_individual(ds4) # write the percentage of good data for groups qcutils.get_coverage_groups(ds4) return ds4
def gfMDS_createdict(cf, ds, series): """ Purpose: Create an information dictionary for MDS gap filling from the contents of the control file. Usage: info["MDS"] = gfMDS_createdict(cf) Author: PRI Date: May 2018 """ # get the section of the control file containing the series section = qcutils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: logger.error( "GapFillUsingMDS: Series %s not found in control file, skipping ...", series) return # create the MDS attribute (a dictionary) in ds, this will hold all MDS settings if "mds" not in dir(ds): ds.mds = {} # name of MDS output series in ds output_list = cf[section][series]["GapFillUsingMDS"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this series ds.mds[output] = {} # get the target if "target" in cf[section][series]["GapFillUsingMDS"][output]: ds.mds[output]["target"] = cf[section][series]["GapFillUsingMDS"][ output]["target"] else: ds.mds[output]["target"] = series # site name ds.mds[output]["site_name"] = ds.globalattributes["site_name"] # list of SOLO settings if "mds_settings" in cf[section][series]["GapFillUsingMDS"][output]: mdss_list = ast.literal_eval( cf[section][series]["GapFillUsingMDS"][output]["mds_settings"]) # list of drivers ds.mds[output]["drivers"] = ast.literal_eval( cf[section][series]["GapFillUsingMDS"][output]["drivers"]) # list of tolerances ds.mds[output]["tolerances"] = ast.literal_eval( cf[section][series]["GapFillUsingMDS"][output]["tolerances"]) # get the ustar filter option opt = qcutils.get_keyvaluefromcf( cf, [section, series, "GapFillUsingMDS", output], "turbulence_filter", default="") ds.mds[output]["turbulence_filter"] = opt # get the day/night filter option opt = qcutils.get_keyvaluefromcf( cf, [section, series, "GapFillUsingMDS", output], "daynight_filter", default="") ds.mds[output]["daynight_filter"] = opt # check that all requested targets and drivers have a mapping to # a FluxNet label, remove if they don't fluxnet_label_map = { "Fc": "NEE", "Fe": "LE", "Fh": "H", "Fsd": "SW_IN", "Ta": "TA", "VPD": "VPD" } for mds_label in ds.mds: ds.mds[mds_label]["mds_label"] = mds_label pfp_target = ds.mds[mds_label]["target"] if pfp_target not in fluxnet_label_map: msg = " Target (" + pfp_target + ") not supported for MDS gap filling" logger.warning(msg) del ds.mds[mds_label] else: ds.mds[mds_label]["target_mds"] = fluxnet_label_map[pfp_target] pfp_drivers = ds.mds[mds_label]["drivers"] for pfp_driver in pfp_drivers: if pfp_driver not in fluxnet_label_map: msg = "Driver (" + pfp_driver + ") not supported for MDS gap filling" logger.warning(msg) ds.mds[mds_label]["drivers"].remove(pfp_driver) else: if "drivers_mds" not in ds.mds[mds_label]: ds.mds[mds_label]["drivers_mds"] = [] ds.mds[mds_label]["drivers_mds"].append( fluxnet_label_map[pfp_driver]) if len(ds.mds[mds_label]["drivers"]) == 0: del ds.mds[mds_label] return
index = numpy.ma.where(quality==item)[0] ok_mask[index] = 0 evi_masked = numpy.ma.masked_where(ok_mask!=0,evi) evi_masked_median = numpy.ma.median(evi_masked.reshape(evi_masked.shape[0],-1),axis=1) # get data for interpolation start = modis_dt[0] end = modis_dt[-1] modis_dt_interp = [result for result in perdelta(start,end,datetime.timedelta(minutes=ts))] modis_time_interp = netCDF4.date2num(modis_dt_interp,modis_time_units) modis_time_masked = numpy.ma.masked_where(numpy.ma.getmaskarray(evi_masked_median)==True,modis_time) modis_time_comp = numpy.ma.compressed(modis_time_masked) evi_masked_median_comp = numpy.ma.compressed(evi_masked_median) x_org = modis_time_comp y_org = evi_masked_median_comp # interpolate onto the tower time step interp_type = qcutils.get_keyvaluefromcf(cf,["EVI"],"interp_type",default="linear") if interp_type.lower() not in ["linear","smooth_interp"]: msg = " Unrecognised interpolation type ("+interp_type+"), using linear ..." log.warning(msg) interp_type = "linear" if interp_type.lower()=="linear": # linear interpolation log.info(" Using linear interpolation") f = scipy.interpolate.interp1d(x_org,y_org,bounds_error=False) evi_interp = f(modis_time_interp) filter_type = qcutils.get_keyvaluefromcf(cf,["EVI"],"filter_type",default="savgol") if filter_type.lower() not in ["savgol"]: msg = " Unrecognised filter type ("+filter_type+"), using Savitsky-Golay ..." log.warning(msg) filter_type = "savgol" if filter_type.lower()=="savgol":
def get_accessdata(cf,ds_60minutes,f,info): # latitude and longitude, chose central pixel of 3x3 grid ds_60minutes.globalattributes["latitude"] = f.variables["lat"][1] ds_60minutes.globalattributes["longitude"] = f.variables["lon"][1] # list of variables to process var_list = cf["Variables"].keys() # get a series of Python datetimes and put this into the data structure valid_date = f.variables["valid_date"][:] nRecs = len(valid_date) valid_time = f.variables["valid_time"][:] dl = [datetime.datetime.strptime(str(int(valid_date[i])*10000+int(valid_time[i])),"%Y%m%d%H%M") for i in range(0,nRecs)] dt_utc_all = numpy.array(dl) time_step = numpy.array([(dt_utc_all[i]-dt_utc_all[i-1]).total_seconds() for i in range(1,len(dt_utc_all))]) time_step = numpy.append(time_step,3600) idxne0 = numpy.where(time_step!=0)[0] idxeq0 = numpy.where(time_step==0)[0] idx_clipped = numpy.where((idxeq0>0)&(idxeq0<nRecs))[0] idxeq0 = idxeq0[idx_clipped] dt_utc = dt_utc_all[idxne0] dt_utc = [x.replace(tzinfo=pytz.utc) for x in dt_utc] dt_loc = [x.astimezone(info["site_tz"]) for x in dt_utc] dt_loc = [x-x.dst() for x in dt_loc] dt_loc = [x.replace(tzinfo=None) for x in dt_loc] flag = numpy.zeros(len(dt_loc),dtype=numpy.int32) ds_60minutes.series["DateTime"] = {} ds_60minutes.series["DateTime"]["Data"] = dt_loc ds_60minutes.series["DateTime"]["Flag"] = flag ds_60minutes.series["DateTime_UTC"] = {} ds_60minutes.series["DateTime_UTC"]["Data"] = dt_utc ds_60minutes.series["DateTime_UTC"]["Flag"] = flag nRecs = len(ds_60minutes.series["DateTime"]["Data"]) ds_60minutes.globalattributes["nc_nrecs"] = nRecs # we're done with valid_date and valid_time, drop them from the variable list for item in ["valid_date","valid_time","lat","lon"]: if item in var_list: var_list.remove(item) # create the QC flag with all zeros nRecs = ds_60minutes.globalattributes["nc_nrecs"] flag_60minutes = numpy.zeros(nRecs,dtype=numpy.int32) # get the UTC hour hr_utc = [x.hour for x in dt_utc] attr = qcutils.MakeAttributeDictionary(long_name='UTC hour') qcutils.CreateSeries(ds_60minutes,'Hr_UTC',hr_utc,Flag=flag_60minutes,Attr=attr) # now loop over the variables listed in the control file for label in var_list: # get the name of the variable in the ACCESS file access_name = qcutils.get_keyvaluefromcf(cf,["Variables",label],"access_name",default=label) # warn the user if the variable not found if access_name not in f.variables.keys(): msg = "Requested variable "+access_name msg = msg+" not found in ACCESS data" logging.error(msg) continue # get the variable attibutes attr = get_variableattributes(f,access_name) # loop over the 3x3 matrix of ACCESS grid data supplied for i in range(0,3): for j in range(0,3): label_ij = label+'_'+str(i)+str(j) if len(f.variables[access_name].shape)==3: series = f.variables[access_name][:,i,j] elif len(f.variables[access_name].shape)==4: series = f.variables[access_name][:,0,i,j] else: msg = "Unrecognised variable ("+label msg = msg+") dimension in ACCESS file" logging.error(msg) series = series[idxne0] qcutils.CreateSeries(ds_60minutes,label_ij,series, Flag=flag_60minutes,Attr=attr) return
def l4qc(cf,ds3): # !!! code here to use existing L4 file # logic # if the L4 doesn't exist # - create ds4 by using copy.deepcopy(ds3) # if the L4 does exist and the "UseExistingL4File" option is False # - create ds4 by using copy.deepcopy(ds3) # if the L4 does exist and the "UseExistingL4File" option is True # - read the contents of the L4 netCDF file # - check the start and end dates of the L3 and L4 data # - if these are the same then tell the user there is nothing to do # - copy the L3 data to the L4 data structure # - replace the L3 data with the L4 data #ds4 = copy.deepcopy(ds3) ds4 = qcio.copy_datastructure(cf,ds3) # ds4 will be empty (logical false) if an error occurs in copy_datastructure # return from this routine if this is the case if not ds4: return ds4 # set some attributes for this level qcutils.UpdateGlobalAttributes(cf,ds4,"L4") ds4.cf = cf # calculate the available energy if "Fa" not in ds4.series.keys(): qcts.CalculateAvailableEnergy(ds4,Fa_out='Fa',Fn_in='Fn',Fg_in='Fg') # create a dictionary to hold the gap filling data ds_alt = {} # check to see if we have any imports qcgf.ImportSeries(cf,ds4) # re-apply the quality control checks (range, diurnal and rules) qcck.do_qcchecks(cf,ds4) # now do the meteorological driver gap filling for ThisOne in cf["Drivers"].keys(): if ThisOne not in ds4.series.keys(): log.error("Series "+ThisOne+" not in data structure"); continue # parse the control file for information on how the user wants to do the gap filling qcgf.GapFillParseControlFile(cf,ds4,ThisOne,ds_alt) # *** start of the section that does the gap filling of the drivers *** # fill short gaps using interpolation qcgf.GapFillUsingInterpolation(cf,ds4) # gap fill using climatology qcgf.GapFillFromClimatology(ds4) # do the gap filling using the ACCESS output qcgf.GapFillFromAlternate(cf,ds4,ds_alt) if ds4.returncodes["alternate"]=="quit": return ds4 # gap fill using SOLO qcgf.GapFillUsingSOLO(cf,ds3,ds4) if ds4.returncodes["solo"]=="quit": return ds4 # merge the first group of gap filled drivers into a single series qcts.MergeSeriesUsingDict(ds4,merge_order="prerequisite") # re-calculate the ground heat flux but only if requested in control file opt = qcutils.get_keyvaluefromcf(cf,["Options"],"CorrectFgForStorage",default="No",mode="quiet") if opt.lower()!="no": qcts.CorrectFgForStorage(cf,ds4,Fg_out='Fg',Fg_in='Fg_Av',Ts_in='Ts',Sws_in='Sws') # re-calculate the net radiation qcts.CalculateNetRadiation(cf,ds4,Fn_out='Fn',Fsd_in='Fsd',Fsu_in='Fsu',Fld_in='Fld',Flu_in='Flu') # re-calculate the available energy qcts.CalculateAvailableEnergy(ds4,Fa_out='Fa',Fn_in='Fn',Fg_in='Fg') # merge the second group of gap filled drivers into a single series qcts.MergeSeriesUsingDict(ds4,merge_order="standard") # re-calculate the water vapour concentrations qcts.CalculateHumiditiesAfterGapFill(ds4) # re-calculate the meteorological variables qcts.CalculateMeteorologicalVariables(ds4) # the Tumba rhumba qcts.CalculateComponentsFromWsWd(ds4) # check for any missing data qcutils.get_missingingapfilledseries(ds4) # write the percentage of good data as a variable attribute qcutils.get_coverage_individual(ds4) # write the percentage of good data for groups qcutils.get_coverage_groups(ds4) return ds4
ds_60minutes.globalattributes["nc_nrecs"] = nRecs # processing level ds_60minutes.globalattributes["nc_level"] = "L1" # latitude and longitude, chose central pixel of 3x3 grid ds_60minutes.globalattributes["latitude"] = f.variables["lat"][1] ds_60minutes.globalattributes["longitude"] = f.variables["lon"][1] # put the ACCESS data into the 60 minute data structure ds_60minutes # make a QC flag with a value of 0 flag_60minutes = numpy.zeros(nRecs) # loop over the variables defined in the control file for item in ["valid_date", "valid_time", "lat", "lon"]: if item in var_list: var_list.remove(item) for var in var_list: # get the name of the ACCESS variable access_name = qcutils.get_keyvaluefromcf(cf, ["Variables", var], "access_name", default=var) if access_name not in f.variables.keys(): logging.error("Requested variable " + access_name + " not found in ACCESS data") continue attr = {} for this_attr in f.varattr[access_name].keys(): attr[this_attr] = f.varattr[access_name][this_attr] attr["missing_value"] = c.missing_value # loop over all ACCESS grids and give them standard OzFlux names with the grid idices appended for i in range(0, 3): for j in range(0, 3): if len(f.variables[access_name].shape) == 3: var_ij = var + '_' + str(i) + str(j) series = f.variables[access_name][:, i, j]