def do_L2_batch(main_ui, cf_level): for i in list(cf_level.keys()): # check the stop flag if main_ui.stop_flag: # break out of the loop if user requested stop break cf_file_name = os.path.split(cf_level[i]) msg = "Starting L2 processing with " + cf_file_name[1] logger.info(msg) if not check_file_exits(cf_level[i]): return 0 try: cf_l2 = pfp_io.get_controlfilecontents(cf_level[i]) if not pfp_compliance.l2_update_controlfile(cf_l2): continue if "Options" not in cf_l2: cf_l2["Options"] = {} cf_l2["Options"]["call_mode"] = "batch" cf_l2["Options"]["show_plots"] = "No" infilename = pfp_io.get_infilenamefromcf(cf_l2) ds1 = pfp_io.NetCDFRead(infilename) if ds1.info["returncodes"]["value"] != 0: return ds2 = pfp_levels.l2qc(cf_l2, ds1) outfilename = pfp_io.get_outfilenamefromcf(cf_l2) pfp_io.NetCDFWrite(outfilename, ds2) msg = "Finished L2 processing with " + cf_file_name[1] logger.info(msg) if "Plots" in list(cf_l2.keys()): logger.info("Plotting L1 and L2 data") for nFig in list(cf_l2['Plots'].keys()): if "(disabled)" in nFig: continue plt_cf = cf_l2['Plots'][str(nFig)] if 'type' in plt_cf.keys(): if str(plt_cf['type']).lower() == 'xy': pfp_plot.plotxy(cf_l2, nFig, plt_cf, ds1, ds2) else: pfp_plot.plottimeseries(cf_l2, nFig, ds1, ds2) else: pfp_plot.plottimeseries(cf_l2, nFig, ds1, ds2) logger.info("Finished plotting L1 and L2 data") logger.info("") except Exception: msg = "Error occurred during L2 processing " + cf_file_name[1] logger.error(msg) error_message = traceback.format_exc() logger.error(error_message) continue return 1
def mpt_main(cf): base_file_path = cf["Files"]["file_path"] nc_file_name = cf["Files"]["in_filename"] nc_file_path = os.path.join(base_file_path, nc_file_name) ds = pfp_io.NetCDFRead(nc_file_path) if ds.info["returncodes"]["value"] != 0: return # get a temporary directory for the log, input and output files tmp_dir = tempfile.TemporaryDirectory(prefix="pfp_mpt_") mpt = {"paths": {"tmp_base": tmp_dir.name}} for item in ["input", "output", "log"]: path = os.path.join(tmp_dir.name, item) os.makedirs(path) mpt["paths"][item] = path out_file_paths = run_mpt_code(cf, ds, mpt) if len(out_file_paths) == 0: return ustar_results = read_mpt_output(out_file_paths) mpt_file_path = nc_file_path.replace(".nc", "_MPT.xlsx") xl_write_mpt(mpt_file_path, ustar_results) return
def do_L5_batch(main_ui, cf_level): sites = sorted(list(cf_level.keys()), key=int) for i in sites: # check the stop flag if main_ui.stop_flag: # break out of the loop if user requested stop break cf_file_name = os.path.split(cf_level[i]) msg = "Starting L5 processing with " + cf_file_name[1] logger.info(msg) if not check_file_exits(cf_level[i]): return 0 try: cf_l5 = pfp_io.get_controlfilecontents(cf_level[i]) if not pfp_compliance.l5_update_controlfile(cf_l5): continue if "Options" not in cf_l5: cf_l5["Options"] = {} cf_l5["Options"]["call_mode"] = "batch" cf_l5["Options"]["show_plots"] = "No" infilename = pfp_io.get_infilenamefromcf(cf_l5) ds4 = pfp_io.NetCDFRead(infilename) if ds4.info["returncodes"]["value"] != 0: return ds5 = pfp_levels.l5qc(None, cf_l5, ds4) outfilename = pfp_io.get_outfilenamefromcf(cf_l5) pfp_io.NetCDFWrite(outfilename, ds5) msg = "Finished L5 processing with " + cf_file_name[1] logger.info(msg) # do the CF compliance check #do_batch_cfcheck(cf_l5) # plot the L5 fingerprints do_batch_fingerprints(cf_l5) logger.info("") except Exception: msg = "Error occurred during L5 with " + cf_file_name[1] logger.error(msg) error_message = traceback.format_exc() logger.error(error_message) continue return 1
def open_netcdf_file(self): file_uri = self.file.filepath() # close the netCDF file self.file.close() # read the netCDF file to a data structure self.ds = pfp_io.NetCDFRead(file_uri, checktimestep=False) if self.ds.info["returncodes"]["value"] != 0: return # display the netcdf file in the GUI self.tabs.tab_dict[self.tabs.tab_index_all] = pfp_gui.file_explore( self) # return if something went wrong if self.tabs.tab_dict[ self.tabs.tab_index_all].ds.info["returncodes"]["value"] != 0: return # add a tab for the netCDF file contents tab_title = os.path.basename(self.ds.info["filepath"]) self.tabs.addTab(self.tabs.tab_dict[self.tabs.tab_index_all], tab_title) self.tabs.setCurrentIndex(self.tabs.tab_index_all) self.tabs.tab_index_all = self.tabs.tab_index_all + 1 return
logger.info(msg) #bp_site_names = ["AdelaideRiver"] for bp_site_name in bp_site_names: idx = processing_info["BP name"].index(bp_site_name) csep_site_name = processing_info["CSEP name"][idx] source_path = os.path.join(bp_base, bp_site_name, "Data", "Processed") os.chdir(source_path) msg = "In " + os.getcwd() logger.info(msg) for level in ["L3", "L4", "L5", "L6"]: source_name = bp_site_name + "_" + level + ".nc" if not os.path.isfile(source_name): msg = " File " + source_name + " not found" logger.error(msg) continue ds = pfp_io.NetCDFRead(source_name) ldt = pfp_utils.GetVariable(ds, "DateTime") start_datetime = ldt["Data"][0] end_datetime = ldt["Data"][-1] file_datetime = "_" + start_datetime.strftime("%Y%m%d") + "_" file_datetime += end_datetime.strftime("%Y%m%d") destination_name = source_name.replace(".nc", file_datetime + ".nc") csep_path = os.path.join(csep_base, csep_site_name, "Data", "Flux", "Processed") csep_uri = os.path.join(csep_path, level, "default", destination_name) msg = " Copying " + source_name + " to " + destination_name logger.info(msg) start = time.time() rclone_cmd = ["rclone", "copyto", source_name, csep_uri] subprocess.call(rclone_cmd) #logger.info(" ".join(rclone_cmd))
def CPD_run(cf): # Set input file and output path and create directories for plots and results path_out = cf['Files']['file_path'] file_in = os.path.join(cf['Files']['file_path'],cf['Files']['in_filename']) # if "out_filename" in cf['Files']: file_out = os.path.join(cf['Files']['file_path'],cf['Files']['out_filename']) else: file_name = cf['Files']['in_filename'].replace(".nc","_CPD_McHugh.xlsx") file_out = os.path.join(cf['Files']['file_path'], file_name) plot_path = pfp_utils.get_keyvaluefromcf(cf, ["Files"], "plot_path", default="plots/") plot_path = os.path.join(plot_path, "CPD", "") if not os.path.isdir(plot_path): os.makedirs(plot_path) results_path = path_out if not os.path.isdir(results_path): os.makedirs(results_path) # get a dictionary of the variable names var_list = list(cf["Variables"].keys()) names = {} for item in var_list: if "name" in list(cf["Variables"][item].keys()): names[item] = cf["Variables"][item]["name"] else: names[item] = item # read the netcdf file ds = pfp_io.NetCDFRead(file_in) if ds.info["returncodes"]["value"] != 0: return ts = int(float(ds.root["Attributes"]["time_step"])) # get the datetime dt = ds.root["Variables"]["DateTime"]["Data"] # adjust the datetime so that the last time period in a year is correctly assigned. # e.g. last period for 2013 is 2014-01-01 00:00, here we make the year 2013 dt = dt - datetime.timedelta(minutes=ts) # now get the data d = {} f = {} for item in list(names.keys()): msg = " CPD (McHugh): Using variable " + names[item] + " for " + item logger.info(msg) var = pfp_utils.GetVariable(ds, names[item]) d[item] = np.ma.filled(var["Data"], np.nan) f[item] = var["Flag"] # set all data to NaNs if any flag not 0 or 10 for item in list(f.keys()): for f_OK in [0, 10]: idx = np.where(f[item] != 0)[0] if len(idx) != 0: for itemd in list(d.keys()): d[itemd][idx] = np.nan d["Year"] = np.array([ldt.year for ldt in dt]) df = pd.DataFrame(d, index=dt) # replace missing values with NaN df.replace(c.missing_value, np.nan) # Build dictionary of additional configs d={} d["radiation_threshold"] = int(cf["Options"]["Fsd_threshold"]) d["num_bootstraps"] = int(cf["Options"]["Num_bootstraps"]) d["flux_period"] = int(float(ds.root["Attributes"]["time_step"])) d["site_name"] = ds.root["Attributes"]["site_name"] d["call_mode"] = pfp_utils.get_keyvaluefromcf(cf, ["Options"], "call_mode", default="interactive", mode="quiet") d["show_plots"] = pfp_utils.get_optionskeyaslogical(cf, "show_plots", default=True) d["plot_tclass"] = False if cf["Options"]["Plot_TClass"] == "True": d["plot_tclass"] = True if cf["Options"]["Output_plots"] == "True": d["plot_path"] = plot_path if cf["Options"]["Output_results"] == "True": d["results_path"] = results_path d["file_out"] = file_out return df,d
def climatology(cf): nc_filename = pfp_io.get_infilenamefromcf(cf) if not pfp_utils.file_exists(nc_filename): msg = " Unable to find netCDF file " + nc_filename logger.error(msg) return xl_filename = nc_filename.replace(".nc", "_Climatology.xls") xlFile = xlwt.Workbook() ds = pfp_io.NetCDFRead(nc_filename) if ds.info["returncodes"]["value"] != 0: return # get the time step ts = int(ds.root["Attributes"]['time_step']) # get the datetime series dt = ds.root["Variables"]['DateTime']['Data'] ldt = dt - datetime.timedelta(minutes=ts) start = datetime.datetime(ldt[0].year, ldt[0].month, ldt[0].day, 0, 0, 0) start += datetime.timedelta(minutes=ts) end = datetime.datetime(ldt[-1].year, ldt[-1].month, ldt[-1].day, 0, 0, 0) end += datetime.timedelta(minutes=1440) Hdh = numpy.array([(d.hour + d.minute / float(60)) for d in dt]) Month = numpy.array([d.month for d in dt]) # get the initial start and end dates StartDate = str(dt[0]) EndDate = str(dt[-1]) # find the start index of the first whole day (time=00:30) si = pfp_utils.GetDateIndex(dt, StartDate, ts=ts, default=0, match='startnextday') # find the end index of the last whole day (time=00:00) ei = pfp_utils.GetDateIndex(dt, EndDate, ts=ts, default=-1, match='endpreviousday') # get local views of the datetime series Hdh = Hdh[si:ei + 1] Month = Month[si:ei + 1] # get the number of time steps in a day and the number of days in the data ntsInDay = int(24.0 * 60.0 / float(ts)) # loop over the variables listed in the control file cf_labels = sorted(list(cf['Variables'].keys())) ds_labels = sorted(list(ds.root["Variables"].keys())) for label in cf_labels: # check to see if an alternative variable name is given label = pfp_utils.get_keyvaluefromcf(cf, ["Variables"], "name", default=label) if label in ds_labels: logger.info(" Doing climatology for " + label) var = pfp_utils.GetVariable(ds, label, start=si, end=ei) # do the diurnal by month statistics fmt_str = get_formatstring(cf, label, fmt_def='') xlSheet = xlFile.add_sheet(label) Av_all = do_diurnalstats(Month, Hdh, var["Data"], xlSheet, format_string=fmt_str, ts=ts) # do the daily statistics var = pfp_utils.GetVariable(ds, label) var = pfp_utils.PadVariable(var, start, end) nDays = int(len(var["Data"])) // ntsInDay data_daily = var["Data"].reshape(nDays, ntsInDay) xlSheet = xlFile.add_sheet(label + '(day)') write_data_1columnpertimestep(xlSheet, data_daily, ts, start, fmt_str) data_daily_i = do_2dinterpolation(data_daily) # check to see if the interpolation has left some data unfilled # this can happen if there is missing data at the boundaries of # the date x hour 2D array idx = numpy.where(numpy.ma.getmaskarray(data_daily_i) == True) # fill any missing data in the interpolated array with the monthly climatology month = numpy.array([dt.month - 1 for dt in var["DateTime"]]) month_daily = month.reshape(nDays, ntsInDay) hour = numpy.array([ int(60 / ts * (dt.hour + float(dt.minute) / float(60))) for dt in var["DateTime"] ]) hour_daily = hour.reshape(nDays, ntsInDay) data_daily_i[idx] = Av_all[hour_daily[idx], month_daily[idx]] # write the interpolated data to the Excel workbook xlSheet = xlFile.add_sheet(label + 'i(day)') write_data_1columnpertimestep(xlSheet, data_daily_i, ts, start, fmt_str) else: msg = " Requested variable " + label + " not in data structure" logger.warning(msg) continue msg = " Saving Excel file " + os.path.split(xl_filename)[1] logger.info(msg) xlFile.save(xl_filename) return
write_empty_values=True) chk_labels = sorted(list(chk["Variables"].keys())) for site_name in site_names: msg = " Processing site " + site_name logger.info(msg) site_portal_path = os.path.join(base_path, site_name, "Data", "Processed") file_names = sorted( [f.name for f in os.scandir(site_portal_path) if f.is_file()]) file_names = [f for f in file_names if (("L3" in f) and (".nc" in f))] msg = " Files: " + ",".join(file_names) logger.info(msg) for file_name in file_names: nc_file_uri = os.path.join(site_portal_path, file_name) # read the netCDF file, don't correct the time step or update the file ds = pfp_io.NetCDFRead(nc_file_uri, checktimestep=False, update=False) ldt = pfp_utils.GetVariable(ds, "DateTime") ts = int(float(ds.globalattributes["time_step"])) dt = pfp_utils.get_timestep(ds) index = numpy.where(dt != ts * 60)[0] if len(index) != 0: msg = str(len(index)) + " problems found with the time stamp" logger.warning(msg) msg = "The first 10 are:" logger.warning(msg) for i in range(min([10, len(index)])): msg = " " + str(ldt["Data"][i - 1]) + str( ldt["Data"][i]) + str(ldt["Data"][i + 1]) logger.warning(msg) # read the netCDF file again, this time correct the time step but don't update the file ds = pfp_io.NetCDFRead(nc_file_uri, update=False)
base_path = cfg["Files"]["existing_access_base_path"] remove_pattern = ["Ah", "q", "Fn_sw", "Fn_lw"] rename_pattern = {"u_": "U_", "v_": "V_"} # loop over sites for site in sites: msg = " Processing " + str(site) logger.info(msg) access_name = site + "_ACCESS.nc" access_uri = os.path.join(base_path, site, "Data", "ACCESS", "downloaded", access_name) if not os.path.isfile(access_uri): msg = "ACCESS file " + access_name + " not found" logger.info(msg) continue ds = pfp_io.NetCDFRead(access_uri) # remove deprecated global attributes gattrs = ["nc_level", "start_date", "end_date", "xl_datemode"] for gattr in gattrs: if gattr in list(ds.globalattributes.keys()): ds.globalattributes.pop(gattr) # add required global attributes ds.globalattributes["processing_level"] = "L1" ds.globalattributes["site_name"] = site ds.globalattributes["latitude"] = site_info[site]["Latitude"] ds.globalattributes["longitude"] = site_info[site]["Longitude"] ds.globalattributes["altitude"] = site_info[site]["Altitude"] ds.globalattributes["time_zone"] = site_info[site]["Time zone"] ds.globalattributes["time_step"] = site_info[site]["Time step"] # remove unwanted variables labels = list(ds.series.keys())
if not os.path.isdir(sp): msg = sp + " , skipping site ..." logger.warning(msg) continue files = sorted([f for f in os.listdir(sp) if ("L3" in f and ".nc" in f)]) if len(files) == 0: msg = "No files found in " + sp + " , skipping ..." logger.error(msg) continue for fn in files: ifp = os.path.join(sp, fn) msg = "Converting " + fn logger.info(msg) std["Files"]["in_filename"] = ifp # read the input file ds1 = pfp_io.NetCDFRead(ifp, update=False) # update the variable names change_variable_names(std, ds1) # make sure there are Ws and Wd series copy_ws_wd(ds1) # make sure we have all the variables we want ... ds2 = include_variables(std, ds1) # ... but not the ones we don't exclude_variables(std, ds2) # update the global attributes change_global_attributes(std, ds2) # update the variable attributes change_variable_attributes(std, ds2) # Fc single point storage consistent_Fco2_storage(std, ds2, site) ofp = os.path.join(op, fn)