def get_data_dict(ds, configs_dict): data = {} # NOTE: series are ndarrays not masked arrays Fc, Fc_flag, a = pfp_utils.GetSeries(ds, "Fc") target = configs_dict["target"] ER, ER_flag, a = pfp_utils.GetSeries(ds, target) Fsd, Fsd_flag, a = pfp_utils.GetSeries(ds, "Fsd") T_label = configs_dict["drivers"][0] T, T_flag, a = pfp_utils.GetSeries(ds, T_label) VPD, VPD_flag, a = pfp_utils.GetSeries(ds, "VPD") ustar, ustar_flag, a = pfp_utils.GetSeries(ds, "ustar") # replace c.missing_value with numpy.nan Fc = numpy.where((Fc_flag != 0) | (Fc == c.missing_value), numpy.nan, Fc) ustar = numpy.where((ustar_flag != 0) | (ustar == c.missing_value), numpy.nan, ustar) ER = numpy.where((ER_flag != 0) | (ER == c.missing_value), numpy.nan, ER) #Fsd = numpy.where((Fsd_flag!=0)|(Fsd==c.missing_value), #numpy.nan,Fsd) #T = numpy.where((T_flag!=0)|(T==c.missing_value), #numpy.nan,T) #VPD = numpy.where((VPD_flag!=0)|(VPD==c.missing_value), #numpy.nan,VPD) # put the data in the dictionary #data["NEE"] = Fc data["NEE"] = ER data["PAR"] = Fsd * 0.46 * 4.6 data["TempC"] = T data["VPD"] = VPD data["ustar"] = ustar data["date_time"] = numpy.array(ds.series["DateTime"]["Data"]) return data
def CPD_run(cf): # Set input file and output path and create directories for plots and results path_out = cf['Files']['file_path'] file_in = os.path.join(cf['Files']['file_path'],cf['Files']['in_filename']) # if "out_filename" in cf['Files']: file_out = os.path.join(cf['Files']['file_path'],cf['Files']['out_filename']) else: file_out = os.path.join(cf['Files']['file_path'],cf['Files']['in_filename'].replace(".nc","_CPD.xls")) plot_path = "plots/" if "plot_path" in cf["Files"]: plot_path = os.path.join(cf["Files"]["plot_path"],"CPD/") if not os.path.isdir(plot_path): os.makedirs(plot_path) results_path = path_out if not os.path.isdir(results_path): os.makedirs(results_path) # get a dictionary of the variable names var_list = cf["Variables"].keys() names = {} for item in var_list: if "AltVarName" in cf["Variables"][item].keys(): names[item] = cf["Variables"][item]["AltVarName"] else: names[item] = item # read the netcdf file logger.info(' Reading netCDF file '+file_in) ds = pfp_io.nc_read_series(file_in) nrecs = int(ds.globalattributes["nc_nrecs"]) ts = int(ds.globalattributes["time_step"]) # get the datetime dt = ds.series["DateTime"]["Data"] # adjust the datetime so that the last time period in a year is correctly assigned. # e.g. last period for 2013 is 2014-01-01 00:00, here we make the year 2013 dt = dt - datetime.timedelta(minutes=ts) # now get the data d = {} f = {} for item in names.keys(): data,flag,attr = pfp_utils.GetSeries(ds,names[item]) d[item] = np.where(data==c.missing_value,np.nan,data) f[item] = flag # set all data to NaNs if any flag not 0 or 10 for item in f.keys(): for f_OK in [0,10]: idx = np.where(f[item]!=0)[0] if len(idx)!=0: for itemd in d.keys(): d[itemd][idx] = np.nan d["Year"] = np.array([ldt.year for ldt in dt]) df=pd.DataFrame(d,index=dt) # replace missing values with NaN df.replace(c.missing_value,np.nan) # Build dictionary of additional configs d={} d['radiation_threshold']=int(cf['Options']['Fsd_threshold']) d['num_bootstraps']=int(cf['Options']['Num_bootstraps']) d['flux_period']=int(ds.globalattributes["time_step"]) d['site_name']=ds.globalattributes["site_name"] d["call_mode"]=pfp_utils.get_keyvaluefromcf(cf,["Options"],"call_mode",default="interactive",mode="quiet") d["show_plots"] = pfp_utils.get_optionskeyaslogical(cf, "show_plots", default=True) d['plot_tclass'] = False if cf['Options']['Plot_TClass'] == 'True': d['plot_tclass'] = True if cf['Options']['Output_plots']=='True': d['plot_path']=plot_path if cf['Options']['Output_results']=='True': d['results_path']=results_path d["file_out"]=file_out return df,d
def do_dependencycheck(cf, ds, section, series, code=23, mode="quiet"): """ Purpose: Usage: Author: PRI Date: Back in the day """ if len(section) == 0 and len(series) == 0: return if len(section) == 0: section = pfp_utils.get_cfsection(cf, series=series, mode='quiet') if "DependencyCheck" not in cf[section][series].keys(): return if "Source" not in cf[section][series]["DependencyCheck"]: msg = " DependencyCheck: keyword Source not found for series " + series + ", skipping ..." logger.error(msg) return if mode == "verbose": msg = " Doing DependencyCheck for " + series logger.info(msg) # get the precursor source list from the control file source_list = ast.literal_eval( cf[section][series]["DependencyCheck"]["Source"]) # check to see if the "ignore_missing" flag is set opt = pfp_utils.get_keyvaluefromcf(cf, [section, series, "DependencyCheck"], "ignore_missing", default="no") ignore_missing = False if opt.lower() in ["yes", "y", "true", "t"]: ignore_missing = True # get the data dependent_data, dependent_flag, dependent_attr = pfp_utils.GetSeries( ds, series) # loop over the precursor source list for item in source_list: # check the precursor is in the data structure if item not in ds.series.keys(): msg = " DependencyCheck: " + series + " precursor series " + item + " not found, skipping ..." logger.warning(msg) continue # get the precursor data precursor_data, precursor_flag, precursor_attr = pfp_utils.GetSeries( ds, item) # check if the user wants to ignore missing precursor data if ignore_missing: # they do, so make an array of missing values nRecs = int(ds.globalattributes["nc_nrecs"]) missing_array = numpy.ones(nRecs) * float(c.missing_value) # and find the indicies of elements equal to the missing value bool_array = numpy.isclose(precursor_data, missing_array) idx = numpy.where(bool_array == True)[0] # and set these flags to 0 so missing data is ignored precursor_flag[idx] = numpy.int32(0) # mask the dependent data where the precursor flag shows data not OK dependent_data = numpy.ma.masked_where( numpy.mod(precursor_flag, 10) != 0, dependent_data) # get an index where the precursor flag shows data not OK idx = numpy.ma.where(numpy.mod(precursor_flag, 10) != 0)[0] # set the dependent QC flag dependent_flag[idx] = numpy.int32(code) # put the data back into the data structure dependent_attr["DependencyCheck_source"] = str(source_list) pfp_utils.CreateSeries(ds, series, dependent_data, dependent_flag, dependent_attr) # our work here is done return