def AhfromRH(ds, Ah_out, RH_in, Ta_in): """ Purpose: Function to calculate absolute humidity given relative humidity and air temperature. Absolute humidity is not calculated if any of the input series are missing or if the specified output series already exists in the data structure. The calculated absolute humidity is created as a new series in the data structure. Usage: qcfunc.AhfromRH(ds,"Ah_HMP_2m","RH_HMP_2m","Ta_HMP_2m") Author: PRI Date: September 2015 """ for item in [RH_in, Ta_in]: if item not in ds.series.keys(): msg = " AhfromRH: Requested series " + item + " not found, " + Ah_out + " not calculated" log.error(msg) return 0 if Ah_out in ds.series.keys(): msg = " AhfromRH: Output series " + Ah_out + " already exists, skipping ..." log.error(msg) return 0 RH_data, RH_flag, RH_attr = qcutils.GetSeriesasMA(ds, RH_in) Ta_data, Ta_flag, Ta_attr = qcutils.GetSeriesasMA(ds, Ta_in) Ah_data = mf.absolutehumidityfromRH(Ta_data, RH_data) Ah_attr = qcutils.MakeAttributeDictionary( long_name="Absolute humidity calculated from " + RH_in + " and " + Ta_in, height=RH_attr["height"], units="g/m3") qcutils.CreateSeries(ds, Ah_out, Ah_data, FList=[RH_in, Ta_in], Attr=Ah_attr) return 1
def AhfromRH(ds, Ah_out, RH_in, Ta_in): """ Purpose: Function to calculate absolute humidity given relative humidity and air temperature. Absolute humidity is not calculated if any of the input series are missing or if the specified output series already exists in the data structure. The calculated absolute humidity is created as a new series in the data structure. Usage: qcfunc.AhfromRH(ds,"Ah_HMP_2m","RH_HMP_2m","Ta_HMP_2m") Author: PRI Date: September 2015 """ nRecs = int(ds.globalattributes["nc_nrecs"]) zeros = numpy.zeros(nRecs, dtype=numpy.int32) ones = numpy.ones(nRecs, dtype=numpy.int32) for item in [RH_in, Ta_in]: if item not in ds.series.keys(): msg = " AhfromRH: Requested series " + item + " not found, " + Ah_out + " not calculated" logger.error(msg) return 0 if Ah_out in ds.series.keys(): msg = " AhfromRH: Output series " + Ah_out + " already exists, skipping ..." logger.error(msg) return 0 RH_data, RH_flag, RH_attr = qcutils.GetSeriesasMA(ds, RH_in) Ta_data, Ta_flag, Ta_attr = qcutils.GetSeriesasMA(ds, Ta_in) Ah_data = mf.absolutehumidityfromRH(Ta_data, RH_data) Ah_attr = qcutils.MakeAttributeDictionary( long_name="Absolute humidity calculated from " + RH_in + " and " + Ta_in, height=RH_attr["height"], units="g/m3") flag = numpy.where(numpy.ma.getmaskarray(Ah_data) == True, ones, zeros) qcutils.CreateSeries(ds, Ah_out, Ah_data, flag, Ah_attr) return 1
def AhfromMR(ds, Ah_out, MR_in, Ta_in, ps_in): """ Purpose: Function to calculate absolute humidity given the water vapour mixing ratio, air temperature and pressure. Absolute humidity is not calculated if any of the input series are missing or if the specified output series already exists in the data structure. The calculated absolute humidity is created as a new series in the data structure. Usage: qcfunc.AhfromMR(ds,"Ah_IRGA_Av","H2O_IRGA_Av","Ta_HMP_2m","ps") Author: PRI Date: September 2015 """ nRecs = int(ds.globalattributes["nc_nrecs"]) zeros = numpy.zeros(nRecs, dtype=numpy.int32) ones = numpy.ones(nRecs, dtype=numpy.int32) for item in [MR_in, Ta_in, ps_in]: if item not in ds.series.keys(): msg = " AhfromMR: Requested series " + item + " not found, " + Ah_out + " not calculated" logger.error(msg) return 0 if Ah_out in ds.series.keys(): msg = " AhfromMR: Output series " + Ah_out + " already exists, skipping ..." logger.error(msg) return 0 MR_data, MR_flag, MR_attr = qcutils.GetSeriesasMA(ds, MR_in) Ta_data, Ta_flag, Ta_attr = qcutils.GetSeriesasMA(ds, Ta_in) ps_data, ps_flag, ps_attr = qcutils.GetSeriesasMA(ds, ps_in) Ah_data = mf.h2o_gpm3frommmolpmol(MR_data, Ta_data, ps_data) long_name = "Absolute humidity calculated from " + MR_in + ", " + Ta_in + " and " + ps_in Ah_attr = qcutils.MakeAttributeDictionary(long_name=long_name, height=MR_attr["height"], units="g/m3") flag = numpy.where(numpy.ma.getmaskarray(Ah_data) == True, ones, zeros) qcutils.CreateSeries(ds, Ah_out, Ah_data, flag, Ah_attr) return 1
def do_dependencycheck(cf,ds,section='',series='',code=23,mode="quiet"): if len(section)==0 and len(series)==0: return if len(section)==0: section = qcutils.get_cfsection(cf,series=series,mode='quiet') if "DependencyCheck" not in cf[section][series].keys(): return if "Source" not in cf[section][series]["DependencyCheck"]: msg = " DependencyCheck: keyword Source not found for series "+series+", skipping ..." log.error(msg) return if mode=="verbose": msg = " Doing DependencyCheck for "+series log.info(msg) # get the precursor source list from the control file source_list = ast.literal_eval(cf[section][series]["DependencyCheck"]["Source"]) # get the data dependent_data,dependent_flag,dependent_attr = qcutils.GetSeriesasMA(ds,series) # loop over the precursor source list for item in source_list: # check the precursor is in the data structure if item not in ds.series.keys(): msg = " DependencyCheck: "+series+" precursor series "+item+" not found, skipping ..." continue # get the precursor data precursor_data,precursor_flag,precursor_attr = qcutils.GetSeriesasMA(ds,item) # mask the dependent data where the precurso is masked dependent_data = numpy.ma.masked_where(numpy.ma.getmaskarray(precursor_data)==True,dependent_data) # get an index of masked precursor data index = numpy.ma.where(numpy.ma.getmaskarray(precursor_data)==True)[0] # set the dependent QC flag dependent_flag[index] = numpy.int32(code) # put the data back into the data structure if series=="Fc": pass dependent_attr["DependencyCheck_source"] = str(source_list) qcutils.CreateSeries(ds,series,dependent_data,Flag=dependent_flag,Attr=dependent_attr) if 'do_dependencychecks' not in ds.globalattributes['Functions']: ds.globalattributes['Functions'] = ds.globalattributes['Functions']+',do_dependencychecks'
def get_instantaneous_precip60(ds_60minutes): hr_utc,f,a = qcutils.GetSeries(ds_60minutes,'Hr_UTC') for i in range(0,3): for j in range(0,3): label = "Precip_"+str(i)+str(j) # get the accumulated precipitation accum,flag,attr = qcutils.GetSeries(ds_60minutes,label) # get the 30 minute precipitation precip = numpy.ediff1d(accum,to_begin=0) # now we deal with the reset of accumulated precipitation at 00, 06, 12 and 18 UTC # indices of analysis times 00, 06, 12, and 18 idx1 = numpy.where(numpy.mod(hr_utc,6)==0)[0] # set 30 minute precipitation at these times to the analysis value precip[idx1] = accum[idx1] # set accumulated precipitations less than 0.001 mm to 0 idx2 = numpy.ma.where(precip<0.01)[0] precip[idx2] = float(0) # set instantaneous precipitation to missing when accumlated precipitation was missing idx = numpy.where(flag!=0)[0] precip[idx] = float(c.missing_value) # set some variable attributes attr["long_name"] = "Precipitation total over time step" attr["units"] = "mm/60 minutes" qcutils.CreateSeries(ds_60minutes,label,precip,Flag=flag,Attr=attr)
def rpLT_createdict(cf, ds, series): """ Purpose: Creates a dictionary in ds to hold information about estimating ecosystem respiration using the Lloyd-Taylor method. Usage: Author: PRI Date October 2015 """ # get the section of the control file containing the series section = qcutils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: logger.error("ERUsingLloydTaylor: Series " + series + " not found in control file, skipping ...") return # check that none of the drivers have missing data driver_list = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) target = cf[section][series]["ERUsingLloydTaylor"]["target"] for label in driver_list: data, flag, attr = qcutils.GetSeriesasMA(ds, label) if numpy.ma.count_masked(data) != 0: logger.error("ERUsingLloydTaylor: driver " + label + " contains missing data, skipping target " + target) return # create the dictionary keys for this series rpLT_info = {} # site name rpLT_info["site_name"] = ds.globalattributes["site_name"] # source series for ER opt = qcutils.get_keyvaluefromcf(cf, [section, series, "ERUsingLloydTaylor"], "source", default="Fc") rpLT_info["source"] = opt # target series name rpLT_info["target"] = cf[section][series]["ERUsingLloydTaylor"]["target"] # list of drivers rpLT_info["drivers"] = ast.literal_eval( cf[section][series]["ERUsingLloydTaylor"]["drivers"]) # name of SOLO output series in ds rpLT_info["output"] = cf[section][series]["ERUsingLloydTaylor"]["output"] # results of best fit for plotting later on rpLT_info["results"] = { "startdate": [], "enddate": [], "No. points": [], "r": [], "Bias": [], "RMSE": [], "Frac Bias": [], "NMSE": [], "Avg (obs)": [], "Avg (LT)": [], "Var (obs)": [], "Var (LT)": [], "Var ratio": [], "m_ols": [], "b_ols": [] } # create the configuration dictionary rpLT_info["configs_dict"] = get_configs_dict(cf, ds) # create an empty series in ds if the output series doesn't exist yet if rpLT_info["output"] not in ds.series.keys(): data, flag, attr = qcutils.MakeEmptySeries(ds, rpLT_info["output"]) qcutils.CreateSeries(ds, rpLT_info["output"], data, flag, attr) # create the merge directory in the data structure if "merge" not in dir(ds): ds.merge = {} if "standard" not in ds.merge.keys(): ds.merge["standard"] = {} # create the dictionary keys for this series ds.merge["standard"][series] = {} # output series name ds.merge["standard"][series]["output"] = series # source ds.merge["standard"][series]["source"] = ast.literal_eval( cf[section][series]["MergeSeries"]["Source"]) # create an empty series in ds if the output series doesn't exist yet if ds.merge["standard"][series]["output"] not in ds.series.keys(): data, flag, attr = qcutils.MakeEmptySeries( ds, ds.merge["standard"][series]["output"]) qcutils.CreateSeries(ds, ds.merge["standard"][series]["output"], data, flag, attr) return rpLT_info
def interpolate_to_30minutes(ds_60minutes): ds_30minutes = qcio.DataStructure() # copy the global attributes for this_attr in ds_60minutes.globalattributes.keys(): ds_30minutes.globalattributes[ this_attr] = ds_60minutes.globalattributes[this_attr] # update the global attribute "time_step" ds_30minutes.globalattributes["time_step"] = 30 # generate the 30 minute datetime series dt_loc_60minutes = ds_60minutes.series["DateTime"]["Data"] dt_loc_30minutes = [ x for x in perdelta(dt_loc_60minutes[0], dt_loc_60minutes[-1], datetime.timedelta(minutes=30)) ] nRecs_30minutes = len(dt_loc_30minutes) dt_utc_60minutes = ds_60minutes.series["DateTime_UTC"]["Data"] dt_utc_30minutes = [ x for x in perdelta(dt_utc_60minutes[0], dt_utc_60minutes[-1], datetime.timedelta(minutes=30)) ] # update the global attribute "nc_nrecs" ds_30minutes.globalattributes['nc_nrecs'] = nRecs_30minutes ds_30minutes.series["DateTime"] = {} ds_30minutes.series["DateTime"]["Data"] = dt_loc_30minutes flag = numpy.zeros(len(dt_loc_30minutes), dtype=numpy.int32) ds_30minutes.series["DateTime"]["Flag"] = flag ds_30minutes.series["DateTime_UTC"] = {} ds_30minutes.series["DateTime_UTC"]["Data"] = dt_utc_30minutes flag = numpy.zeros(len(dt_utc_30minutes), dtype=numpy.int32) ds_30minutes.series["DateTime_UTC"]["Flag"] = flag # get the year, month etc from the datetime qcutils.get_xldatefromdatetime(ds_30minutes) qcutils.get_ymdhmsfromdatetime(ds_30minutes) # interpolate to 30 minutes nRecs_60 = len(ds_60minutes.series["DateTime"]["Data"]) nRecs_30 = len(ds_30minutes.series["DateTime"]["Data"]) x_60minutes = numpy.arange(0, nRecs_60, 1) x_30minutes = numpy.arange(0, nRecs_60 - 0.5, 0.5) varlist_60 = ds_60minutes.series.keys() # strip out the date and time variables already done for item in [ "DateTime", "DateTime_UTC", "xlDateTime", "Year", "Month", "Day", "Hour", "Minute", "Second", "Hdh", "Hr_UTC" ]: if item in varlist_60: varlist_60.remove(item) # now do the interpolation (its OK to interpolate accumulated precipitation) for label in varlist_60: series_60minutes, flag, attr = qcutils.GetSeries(ds_60minutes, label) ci_60minutes = numpy.zeros(len(series_60minutes)) idx = numpy.where( abs(series_60minutes - float(c.missing_value)) < c.eps)[0] ci_60minutes[idx] = float(1) int_fn = interp1d(x_60minutes, series_60minutes) series_30minutes = int_fn(x_30minutes) int_fn = interp1d(x_60minutes, ci_60minutes) ci_30minutes = int_fn(x_30minutes) idx = numpy.where(abs(ci_30minutes - float(0)) > c.eps)[0] series_30minutes[idx] = numpy.float64(c.missing_value) flag_30minutes = numpy.zeros(nRecs_30, dtype=numpy.int32) flag_30minutes[idx] = numpy.int32(1) qcutils.CreateSeries(ds_30minutes, label, series_30minutes, Flag=flag_30minutes, Attr=attr) # get the UTC hour hr_utc = [float(x.hour) + float(x.minute) / 60 for x in dt_utc_30minutes] attr = qcutils.MakeAttributeDictionary(long_name='UTC hour') flag_30minutes = numpy.zeros(nRecs_30, dtype=numpy.int32) qcutils.CreateSeries(ds_30minutes, 'Hr_UTC', hr_utc, Flag=flag_30minutes, Attr=attr) return ds_30minutes
ds_aws_60minute.series["DateTime"][ "Attr"] = qcutils.MakeAttributeDictionary( long_name="DateTime in local time zone", units="None") # add the Excel datetime, year, month etc qcutils.get_xldatefromdatetime(ds_aws_60minute) qcutils.get_ymdhmsfromdatetime(ds_aws_60minute) # loop over the series and take the average (every thing but Precip) or sum (Precip) for item in series_list: if "Precip" in item: data_30minute, flag_30minute, attr = qcutils.GetSeriesasMA( ds_aws_30minute, item, si=si_wholehour, ei=ei_wholehour) data_2d = numpy.reshape(data_30minute, (nRecs_30minute / 2, 2)) flag_2d = numpy.reshape(flag_30minute, (nRecs_30minute / 2, 2)) data_60minute = numpy.ma.sum(data_2d, axis=1) flag_60minute = numpy.ma.max(flag_2d, axis=1) qcutils.CreateSeries(ds_aws_60minute, item, data_60minute, flag_60minute, attr) elif "Wd" in item: Ws_30minute, flag_30minute, attr = qcutils.GetSeriesasMA( ds_aws_30minute, item, si=si_wholehour, ei=ei_wholehour) Wd_30minute, flag_30minute, attr = qcutils.GetSeriesasMA( ds_aws_30minute, item, si=si_wholehour, ei=ei_wholehour) U_30minute, V_30minute = qcutils.convert_WsWdtoUV( Ws_30minute, Wd_30minute) U_2d = numpy.reshape(U_30minute, (nRecs_30minute / 2, 2)) V_2d = numpy.reshape(V_30minute, (nRecs_30minute / 2, 2)) flag_2d = numpy.reshape(flag_30minute, (nRecs_30minute / 2, 2)) U_60minute = numpy.ma.sum(U_2d, axis=1) V_60minute = numpy.ma.sum(V_2d, axis=1) Ws_60minute, Wd_60minute = qcutils.convert_UVtoWsWd( U_60minute, V_60minute) flag_60minute = numpy.ma.max(flag_2d, axis=1)
# now pull the data out and put it in separate data structures, one per station, all # of which are held in a data structure dictionary ds_dict = {} for bom_id in data_dict.keys(): log.info("Processing BoM station: "+str(bom_id)) # create a data structure ds=qcio.DataStructure() # put the year, month, day, hour and minute into the data structure nRecs = data_dict[bom_id].shape[0] ds.globalattributes["nc_nrecs"] = nRecs ds.globalattributes["time_step"] = 30 ds.globalattributes["latitude"] = bom_sites_info[site_name][str(bom_id)]["latitude"] ds.globalattributes["longitude"] = bom_sites_info[site_name][str(bom_id)]["longitude"] flag = numpy.zeros(nRecs,dtype=numpy.int32) Seconds = numpy.zeros(nRecs,dtype=numpy.float64) qcutils.CreateSeries(ds,'Year',data_dict[bom_id][:,1],Flag=flag,Attr=qcutils.MakeAttributeDictionary(long_name='Year',units='none')) qcutils.CreateSeries(ds,'Month',data_dict[bom_id][:,2],Flag=flag,Attr=qcutils.MakeAttributeDictionary(long_name='Month',units='none')) qcutils.CreateSeries(ds,'Day',data_dict[bom_id][:,3],Flag=flag,Attr=qcutils.MakeAttributeDictionary(long_name='Day',units='none')) qcutils.CreateSeries(ds,'Hour',data_dict[bom_id][:,4],Flag=flag,Attr=qcutils.MakeAttributeDictionary(long_name='Hour',units='none')) qcutils.CreateSeries(ds,'Minute',data_dict[bom_id][:,5],Flag=flag,Attr=qcutils.MakeAttributeDictionary(long_name='Minute',units='none')) qcutils.CreateSeries(ds,'Second',Seconds,Flag=flag,Attr=qcutils.MakeAttributeDictionary(long_name='Second',units='none')) # now get the Python datetime qcutils.get_datetimefromymdhms(ds) # now put the data into the data structure attr=qcutils.MakeAttributeDictionary(long_name='Precipitation since 0900',units='mm', bom_id=str(bom_id),bom_name=bom_sites_info[site_name][str(bom_id)]["site_name"], bom_dist=bom_sites_info[site_name][str(bom_id)]["distance"]) qcutils.CreateSeries(ds,'Precip',data_dict[bom_id][:,6],Flag=flag,Attr=attr) attr=qcutils.MakeAttributeDictionary(long_name='Air temperature',units='C', bom_id=str(bom_id),bom_name=bom_sites_info[site_name][str(bom_id)]["site_name"], bom_dist=bom_sites_info[site_name][str(bom_id)]["distance"])
def gfClimatology_interpolateddaily(ds, series, output, xlbooks): """ Gap fill using data interpolated over a 2D array where the days are the rows and the time of day is the columns. """ # gap fill from interpolated 30 minute data xlfilename = ds.climatology[output]["file_name"] sheet_name = series + 'i(day)' if sheet_name not in xlbooks[xlfilename].sheet_names(): msg = " gfClimatology: sheet " + sheet_name + " not found, skipping ..." logger.warning(msg) return ldt = ds.series["DateTime"]["Data"] thissheet = xlbooks[xlfilename].sheet_by_name(sheet_name) datemode = xlbooks[xlfilename].datemode basedate = datetime.datetime(1899, 12, 30) nts = thissheet.ncols - 1 ndays = thissheet.nrows - 2 # read the time stamp values from the climatology worksheet tsteps = thissheet.row_values(1, start_colx=1, end_colx=nts + 1) # read the data from the climatology workbook val1d = numpy.ma.zeros(ndays * nts, dtype=numpy.float64) # initialise an array for the datetime of the climatological values cdt = [None] * nts * ndays # loop over the rows (days) of data for xlRow in range(ndays): # get the Excel datetime value xldatenumber = int(thissheet.cell_value(xlRow + 2, 0)) # convert this to a Python Datetime xldatetime = basedate + datetime.timedelta(days=xldatenumber + 1462 * datemode) # fill the climatology datetime array cdt[xlRow * nts:(xlRow + 1) * nts] = [ xldatetime + datetime.timedelta(hours=hh) for hh in tsteps ] # fill the climatological value array val1d[xlRow * nts:(xlRow + 1) * nts] = thissheet.row_values( xlRow + 2, start_colx=1, end_colx=nts + 1) # get the data to be filled with climatological values data, flag, attr = qcutils.GetSeriesasMA(ds, series) # get an index of missing values idx = numpy.where(numpy.ma.getmaskarray(data) == True)[0] #idx = numpy.ma.where(numpy.ma.getmaskarray(data)==True)[0] # there must be a better way to do this ... # simply using the index (idx) to set a slice of the data array to the gap filled values in val1d # does not seem to work (mask stays true on replaced values in data), the work around is to # step through the indices, find the time of the missing value in data, find the same time in the # gap filled values val1d and set the missing element of data to this element of val1d # actually ... # this may not be the fastest but it may be the most robust because it matches dates of missing data # to dates in the climatology file for ii in idx: try: jj = qcutils.find_nearest_value(cdt, ldt[ii]) data[ii] = val1d[jj] flag[ii] = numpy.int32(40) except ValueError: data[ii] = numpy.float64(c.missing_value) flag[ii] = numpy.int32(41) # put the gap filled data back into the data structure qcutils.CreateSeries(ds, output, data, flag, attr)
def get_accessdata(cf,ds_60minutes,f,info): # latitude and longitude, chose central pixel of 3x3 grid ds_60minutes.globalattributes["latitude"] = f.variables["lat"][1] ds_60minutes.globalattributes["longitude"] = f.variables["lon"][1] # list of variables to process var_list = list(cf["Variables"].keys()) # get a series of Python datetimes and put this into the data structure valid_date = f.variables["valid_date"][:] nRecs = len(valid_date) valid_time = f.variables["valid_time"][:] dl = [datetime.datetime.strptime(str(int(valid_date[i])*10000+int(valid_time[i])),"%Y%m%d%H%M") for i in range(0,nRecs)] dt_utc_all = numpy.array(dl) time_step = numpy.array([(dt_utc_all[i]-dt_utc_all[i-1]).total_seconds() for i in range(1,len(dt_utc_all))]) time_step = numpy.append(time_step,3600) idxne0 = numpy.where(time_step!=0)[0] idxeq0 = numpy.where(time_step==0)[0] idx_clipped = numpy.where((idxeq0>0)&(idxeq0<nRecs))[0] idxeq0 = idxeq0[idx_clipped] dt_utc = dt_utc_all[idxne0] dt_utc = [x.replace(tzinfo=pytz.utc) for x in dt_utc] dt_loc = [x.astimezone(info["site_tz"]) for x in dt_utc] dt_loc = [x-x.dst() for x in dt_loc] dt_loc = [x.replace(tzinfo=None) for x in dt_loc] flag = numpy.zeros(len(dt_loc),dtype=numpy.int32) ds_60minutes.series["DateTime"] = {} ds_60minutes.series["DateTime"]["Data"] = dt_loc ds_60minutes.series["DateTime"]["Flag"] = flag ds_60minutes.series["DateTime_UTC"] = {} ds_60minutes.series["DateTime_UTC"]["Data"] = dt_utc ds_60minutes.series["DateTime_UTC"]["Flag"] = flag nRecs = len(ds_60minutes.series["DateTime"]["Data"]) ds_60minutes.globalattributes["nc_nrecs"] = nRecs # we're done with valid_date and valid_time, drop them from the variable list for item in ["valid_date","valid_time","lat","lon"]: if item in var_list: var_list.remove(item) # create the QC flag with all zeros nRecs = ds_60minutes.globalattributes["nc_nrecs"] flag_60minutes = numpy.zeros(nRecs,dtype=numpy.int32) # get the UTC hour hr_utc = [x.hour for x in dt_utc] attr = qcutils.MakeAttributeDictionary(long_name='UTC hour') qcutils.CreateSeries(ds_60minutes,'Hr_UTC',hr_utc,Flag=flag_60minutes,Attr=attr) # now loop over the variables listed in the control file for label in var_list: # get the name of the variable in the ACCESS file access_name = qcutils.get_keyvaluefromcf(cf,["Variables",label],"access_name",default=label) # warn the user if the variable not found if access_name not in list(f.variables.keys()): msg = "Requested variable "+access_name msg = msg+" not found in ACCESS data" logging.error(msg) continue # get the variable attibutes attr = get_variableattributes(f,access_name) # loop over the 3x3 matrix of ACCESS grid data supplied for i in range(0,3): for j in range(0,3): label_ij = label+'_'+str(i)+str(j) if len(f.variables[access_name].shape)==3: series = f.variables[access_name][:,i,j] elif len(f.variables[access_name].shape)==4: series = f.variables[access_name][:,0,i,j] else: msg = "Unrecognised variable ("+label msg = msg+") dimension in ACCESS file" logging.error(msg) series = series[idxne0] qcutils.CreateSeries(ds_60minutes,label_ij,series, Flag=flag_60minutes,Attr=attr) return
def read_isd_file(isd_file_path): """ Purpose: Reads an ISD CSV file (gz or uncompressed) and returns the data in a data structure. Assumptions: Usage: Author: PRI Date: June 2017 """ isd_file_name = os.path.split(isd_file_path)[1] msg = "Reading ISD file " + isd_file_name logger.info(msg) isd_site_id = isd_file_name.split("-") isd_site_id = isd_site_id[0] + "-" + isd_site_id[1] # read the file if os.path.splitext(isd_file_path)[1] == ".gz": with gzip.open(isd_file_path, 'rb') as fp: content = fp.readlines() else: with open(isd_file_path) as fp: content = fp.readlines() # get a data structure ds = qcio.DataStructure() # get the site latitude, longitude and altitude ds.globalattributes["altitude"] = float(content[0][46:51]) ds.globalattributes["latitude"] = float(content[0][28:34]) / float(1000) ds.globalattributes["longitude"] = float(content[0][34:41]) / float(1000) ds.globalattributes["isd_site_id"] = isd_site_id # initialise the data structure ds.series["DateTime"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Datetime", "units": "none" } } ds.series["Wd"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Wind direction", "units": "degrees" } } ds.series["Ws"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Wind speed", "units": "m/s" } } ds.series["Ta"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Air temperature", "units": "C" } } ds.series["Td"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Dew point temperature", "units": "C" } } ds.series["ps"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Surface pressure", "units": "kPa" } } ds.series["Precip"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Precipitation", "units": "mm" } } # define the codes for good data in the ISD file OK_obs_code = [ "AUTO ", "CRN05", "CRN15", "FM-12", "FM-15", "FM-16", "SY-MT" ] # iterate over the lines in the file and decode the data for i in range(len(content) - 1): #for i in range(10): # filter out anything other than hourly data if content[i][41:46] not in OK_obs_code: continue YY = int(content[i][15:19]) MM = int(content[i][19:21]) DD = int(content[i][21:23]) HH = int(content[i][23:25]) mm = int(content[i][25:27]) dt = datetime.datetime(YY, MM, DD, HH, mm, 0) ds.series["DateTime"]["Data"].append(pytz.utc.localize(dt)) # wind direction, degT try: ds.series["Wd"]["Data"].append(float(content[i][60:63])) except: ds.series["Wd"]["Data"].append(float(999)) # wind speed, m/s try: ds.series["Ws"]["Data"].append( float(content[i][65:69]) / float(10)) except: ds.series["Ws"]["Data"].append(float(999.9)) # air temperature, C try: ds.series["Ta"]["Data"].append( float(content[i][87:92]) / float(10)) except: ds.series["Ta"]["Data"].append(float(999.9)) # dew point temperature, C try: ds.series["Td"]["Data"].append( float(content[i][93:98]) / float(10)) except: ds.series["Td"]["Data"].append(float(999.9)) # sea level pressure, hPa try: ds.series["ps"]["Data"].append( float(content[i][99:104]) / float(10)) except: ds.series["ps"]["Data"].append(float(9999.9)) # precipitation, mm if content[i][108:111] == "AA1": try: ds.series["Precip"]["Data"].append( float(content[i][113:117]) / float(10)) except: ds.series["Precip"]["Data"].append(float(999.9)) else: ds.series["Precip"]["Data"].append(float(999.9)) # add the time zone to the DateTime ataributes ds.series["DateTime"]["Attr"]["time_zone"] = "UTC" # convert from lists to masked arrays f0 = numpy.zeros(len(ds.series["DateTime"]["Data"])) f1 = numpy.ones(len(ds.series["DateTime"]["Data"])) ds.series["DateTime"]["Data"] = numpy.array(ds.series["DateTime"]["Data"]) ds.series["DateTime"]["Flag"] = f0 ds.globalattributes["nc_nrecs"] = len(f0) dt_delta = qcutils.get_timestep(ds) ts = scipy.stats.mode(dt_delta)[0] / 60 ds.globalattributes["time_step"] = ts[0] ds.series["Wd"]["Data"] = numpy.ma.masked_equal(ds.series["Wd"]["Data"], 999) ds.series["Wd"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Wd"]["Data"]) == True, f1, f0) ds.series["Ws"]["Data"] = numpy.ma.masked_equal(ds.series["Ws"]["Data"], 999.9) ds.series["Ws"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Ws"]["Data"]) == True, f1, f0) ds.series["Ta"]["Data"] = numpy.ma.masked_equal(ds.series["Ta"]["Data"], 999.9) ds.series["Ta"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Ta"]["Data"]) == True, f1, f0) ds.series["Td"]["Data"] = numpy.ma.masked_equal(ds.series["Td"]["Data"], 999.9) ds.series["Td"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Td"]["Data"]) == True, f1, f0) # hPa to kPa ds.series["ps"]["Data"] = numpy.ma.masked_equal(ds.series["ps"]["Data"], 9999.9) / float(10) ds.series["ps"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["ps"]["Data"]) == True, f1, f0) # convert sea level pressure to station pressure site_altitude = float(ds.globalattributes["altitude"]) cfac = numpy.ma.exp( (-1 * site_altitude) / ((ds.series["Ta"]["Data"] + 273.15) * 29.263)) ds.series["ps"]["Data"] = ds.series["ps"]["Data"] * cfac # do precipitation and apply crude limits ds.series["Precip"]["Data"] = numpy.ma.masked_equal( ds.series["Precip"]["Data"], 999.9) condition = (ds.series["Precip"]["Data"] < 0) | (ds.series["Precip"]["Data"] > 100) ds.series["Precip"]["Data"] = numpy.ma.masked_where( condition, ds.series["Precip"]["Data"]) ds.series["Precip"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Precip"]["Data"]) == True, f1, f0) # get the humidities from Td Ta, flag, attr = qcutils.GetSeriesasMA(ds, "Ta") Td, flag, attr = qcutils.GetSeriesasMA(ds, "Td") ps, flag, attr = qcutils.GetSeriesasMA(ds, "ps") RH = mf.RHfromdewpoint(Td, Ta) flag = numpy.where(numpy.ma.getmaskarray(RH) == True, f1, f0) attr = {"long_name": "Relative humidity", "units": "%"} qcutils.CreateSeries(ds, "RH", RH, Flag=flag, Attr=attr) Ah = mf.absolutehumidityfromRH(Ta, RH) flag = numpy.where(numpy.ma.getmaskarray(Ah) == True, f1, f0) attr = {"long_name": "Absolute humidity", "units": "g/m3"} qcutils.CreateSeries(ds, "Ah", Ah, Flag=flag, Attr=attr) q = mf.specifichumidityfromRH(RH, Ta, ps) flag = numpy.where(numpy.ma.getmaskarray(q) == True, f1, f0) attr = {"long_name": "Specific humidity", "units": "kg/kg"} qcutils.CreateSeries(ds, "q", q, Flag=flag, Attr=attr) # return the data return ds
def gfalternate_matchstartendtimes(ds, ds_alternate): """ Purpose: Match the start and end times of the alternate and tower data. The logic is as follows: - if there is no overlap between the alternate and tower data then dummy series with missing data are created for the alternate data for the period of the tower data - if the alternate and tower data overlap then truncate or pad (with missing values) the alternate data series so that the periods of the tower data and alternate data match. Usage: gfalternate_matchstartendtimes(ds,ds_alternate) where ds is the data structure containing the tower data ds_alternate is the data structure containing the alternate data Author: PRI Date: July 2015 """ # check the time steps are the same ts_tower = int(ds.globalattributes["time_step"]) ts_alternate = int(ds_alternate.globalattributes["time_step"]) if ts_tower != ts_alternate: msg = " GapFillFromAlternate: time step for tower and alternate data are different, returning ..." logger.error(msg) ds.returncodes["GapFillFromAlternate"] = "error" return # get the start and end times of the tower and the alternate data and see if they overlap ldt_alternate = ds_alternate.series["DateTime"]["Data"] start_alternate = ldt_alternate[0] ldt_tower = ds.series["DateTime"]["Data"] end_tower = ldt_tower[-1] # since the datetime is monotonically increasing we need only check the start datetime overlap = start_alternate <= end_tower # do the alternate and tower data overlap? if overlap: # index of alternate datetimes that are also in tower datetimes #alternate_index = qcutils.FindIndicesOfBInA(ldt_tower,ldt_alternate) #alternate_index = [qcutils.find_nearest_value(ldt_tower, dt) for dt in ldt_alternate] # index of tower datetimes that are also in alternate datetimes #tower_index = qcutils.FindIndicesOfBInA(ldt_alternate,ldt_tower) #tower_index = [qcutils.find_nearest_value(ldt_alternate, dt) for dt in ldt_tower] tower_index, alternate_index = qcutils.FindMatchingIndices( ldt_tower, ldt_alternate) # check that the indices point to the same times ldta = [ldt_alternate[i] for i in alternate_index] ldtt = [ldt_tower[i] for i in tower_index] if ldta != ldtt: # and exit with a helpful message if they dont logger.error(" Something went badly wrong and I'm giving up") sys.exit() # get a list of alternate series alternate_series_list = [ item for item in ds_alternate.series.keys() if "_QCFlag" not in item ] # number of records in truncated or padded alternate data nRecs_tower = len(ldt_tower) # force the alternate dattime to be the tower date time ds_alternate.series["DateTime"] = ds.series["DateTime"] # loop over the alternate series and truncate or pad as required # truncation or padding is handled by the indices for series in alternate_series_list: if series in ["DateTime", "DateTime_UTC"]: continue # get the alternate data data, flag, attr = qcutils.GetSeriesasMA(ds_alternate, series) # create an array of missing data of the required length data_overlap = numpy.full(nRecs_tower, c.missing_value, dtype=numpy.float64) flag_overlap = numpy.ones(nRecs_tower, dtype=numpy.int32) # replace missing data with alternate data where times match data_overlap[tower_index] = data[alternate_index] flag_overlap[tower_index] = flag[alternate_index] # write the truncated or padded series back into the alternate data structure qcutils.CreateSeries(ds_alternate, series, data_overlap, flag_overlap, attr) # update the number of records in the file ds_alternate.globalattributes["nc_nrecs"] = nRecs_tower else: # there is no overlap between the alternate and tower data, create dummy series nRecs = len(ldt_tower) ds_alternate.globalattributes["nc_nrecs"] = nRecs ds_alternate.series["DateTime"] = ds.series["DateTime"] alternate_series_list = [ item for item in ds_alternate.series.keys() if "_QCFlag" not in item ] for series in alternate_series_list: if series in ["DateTime", "DateTime_UTC"]: continue _, _, attr = qcutils.GetSeriesasMA(ds_alternate, series) data = numpy.full(nRecs, c.missing_value, dtype=numpy.float64) flag = numpy.ones(nRecs, dtype=numpy.int32) qcutils.CreateSeries(ds_alternate, series, data, flag, attr) ds.returncodes["GapFillFromAlternate"] = "normal"
savgol_window = qcutils.get_keyvaluefromcf(cf, ["EVI"], "savgol_window", default=10001) savgol_order = qcutils.get_keyvaluefromcf(cf, ["EVI"], "savgol_order", default=4) evi_interp_smooth = scipy.signal.savgol_filter( evi_interp, savgol_window, savgol_order) elif interp_type.lower() == "smooth_interp": # smoothed spline interpolation log.info(" Using smoothed spline interpolation") smooth_factor = qcutils.get_keyvaluefromcf(cf, ["EVI"], "smooth_factor", default=0.03) tck = scipy.interpolate.splrep(x_org, y_org, s=smooth_factor) evi_interp_smooth = scipy.interpolate.splev(modis_time_interp, tck, der=0) # now put data into a data structure ds.series["DateTime"] = {} ds.series["DateTime"]["Data"] = modis_dt_interp ds.series["DateTime"]["Flag"] = numpy.zeros(len(modis_dt_interp), dtype=numpy.int32) ds.series["DateTime"]["Attr"] = {} ds.series["DateTime"]["Attr"]["long_name"] = "Datetime in local timezone" ds.series["DateTime"]["Attr"]["units"] = "None" qcutils.get_ymdhmsfromdatetime(ds) qcutils.CreateSeries(ds, "evi", evi_interp_smooth, evi_flag, evi_attr) nc_file = qcio.nc_open_write(outfilename) qcio.nc_write_series(nc_file, ds)
def gfalternate_createdict(cf, ds, series, ds_alt): """ Purpose: Creates a dictionary in ds to hold information about the alternate data used to gap fill the tower data. Usage: Side effects: Author: PRI Date: August 2014 """ # get the section of the control file containing the series section = qcutils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: logger.error( "GapFillFromAlternate: Series %s not found in control file, skipping ...", series) return # create the alternate directory in the data structure if "alternate" not in dir(ds): ds.alternate = {} # name of alternate output series in ds output_list = cf[section][series]["GapFillFromAlternate"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this output ds.alternate[output] = {} ds.alternate[output]["label_tower"] = series # source name ds.alternate[output]["source"] = cf[section][series][ "GapFillFromAlternate"][output]["source"] # site name ds.alternate[output]["site_name"] = ds.globalattributes["site_name"] # alternate data file name # first, look in the [Files] section for a generic file name file_list = cf["Files"].keys() lower_file_list = [item.lower() for item in file_list] if ds.alternate[output]["source"].lower() in lower_file_list: # found a generic file name i = lower_file_list.index(ds.alternate[output]["source"].lower()) ds.alternate[output]["file_name"] = cf["Files"][file_list[i]] else: # no generic file name found, look for a file name in the variable section ds.alternate[output]["file_name"] = cf[section][series][ "GapFillFromAlternate"][output]["file_name"] # if the file has not already been read, do it now if ds.alternate[output]["file_name"] not in ds_alt: ds_alternate = qcio.nc_read_series( ds.alternate[output]["file_name"], fixtimestepmethod="round") gfalternate_matchstartendtimes(ds, ds_alternate) ds_alt[ds.alternate[output]["file_name"]] = ds_alternate # get the type of fit ds.alternate[output]["fit_type"] = "OLS" if "fit" in cf[section][series]["GapFillFromAlternate"][output]: if cf[section][series]["GapFillFromAlternate"][output][ "fit"].lower() in [ "ols", "ols_thru0", "mrev", "replace", "rma", "odr" ]: ds.alternate[output]["fit_type"] = cf[section][series][ "GapFillFromAlternate"][output]["fit"] else: logger.info( "gfAlternate: unrecognised fit option for series %s, used OLS", output) # correct for lag? if "lag" in cf[section][series]["GapFillFromAlternate"][output]: if cf[section][series]["GapFillFromAlternate"][output][ "lag"].lower() in ["no", "false"]: ds.alternate[output]["lag"] = "no" elif cf[section][series]["GapFillFromAlternate"][output][ "lag"].lower() in ["yes", "true"]: ds.alternate[output]["lag"] = "yes" else: logger.info( "gfAlternate: unrecognised lag option for series %s", output) else: ds.alternate[output]["lag"] = "yes" # choose specific alternate variable? if "usevars" in cf[section][series]["GapFillFromAlternate"][output]: ds.alternate[output]["usevars"] = ast.literal_eval( cf[section][series]["GapFillFromAlternate"][output]["usevars"]) # alternate data variable name if different from name used in control file if "alternate_name" in cf[section][series]["GapFillFromAlternate"][ output]: ds.alternate[output]["alternate_name"] = cf[section][series][ "GapFillFromAlternate"][output]["alternate_name"] else: ds.alternate[output]["alternate_name"] = series # results of best fit for plotting later on ds.alternate[output]["results"] = { "startdate": [], "enddate": [], "No. points": [], "No. filled": [], "r": [], "Bias": [], "RMSE": [], "Frac Bias": [], "NMSE": [], "Avg (Tower)": [], "Avg (Alt)": [], "Var (Tower)": [], "Var (Alt)": [], "Var ratio": [] } # create an empty series in ds if the alternate output series doesn't exist yet if output not in ds.series.keys(): data, flag, attr = qcutils.MakeEmptySeries(ds, output) qcutils.CreateSeries(ds, output, data, flag, attr) qcutils.CreateSeries(ds, series + "_composite", data, flag, attr)
def rpLL_createdict(cf, ds, series): """ Purpose: Creates a dictionary in ds to hold information about estimating ecosystem respiration using the Lasslop method. Usage: Author: PRI Date April 2016 """ # get the section of the control file containing the series section = qcutils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: log.error("ERUsingLasslop: Series " + series + " not found in control file, skipping ...") return # check that none of the drivers have missing data driver_list = ast.literal_eval( cf[section][series]["ERUsingLasslop"]["drivers"]) target = cf[section][series]["ERUsingLasslop"]["target"] for label in driver_list: data, flag, attr = qcutils.GetSeriesasMA(ds, label) if numpy.ma.count_masked(data) != 0: log.error("ERUsingLasslop: driver " + label + " contains missing data, skipping target " + target) return # create the solo directory in the data structure if "rpLL" not in dir(ds): ds.rpLL = {} # create the dictionary keys for this series ds.rpLL[series] = {} # site name ds.rpLL[series]["site_name"] = ds.globalattributes["site_name"] # target series name ds.rpLL[series]["target"] = cf[section][series]["ERUsingLasslop"]["target"] # list of drivers ds.rpLL[series]["drivers"] = ast.literal_eval( cf[section][series]["ERUsingLasslop"]["drivers"]) # name of output series in ds ds.rpLL[series]["output"] = cf[section][series]["ERUsingLasslop"]["output"] # results of best fit for plotting later on ds.rpLL[series]["results"] = { "startdate": [], "enddate": [], "No. points": [], "r": [], "Bias": [], "RMSE": [], "Frac Bias": [], "NMSE": [], "Avg (obs)": [], "Avg (LT)": [], "Var (obs)": [], "Var (LT)": [], "Var ratio": [], "m_ols": [], "b_ols": [] } # step size ds.rpLL[series]["step_size_days"] = int( cf[section][series]["ERUsingLasslop"]["step_size_days"]) # window size ds.rpLL[series]["window_size_days"] = int( cf[section][series]["ERUsingLasslop"]["window_size_days"]) # create an empty series in ds if the output series doesn't exist yet if ds.rpLL[series]["output"] not in ds.series.keys(): data, flag, attr = qcutils.MakeEmptySeries(ds, ds.rpLL[series]["output"]) qcutils.CreateSeries(ds, ds.rpLL[series]["output"], data, Flag=flag, Attr=attr) # create the merge directory in the data structure if "merge" not in dir(ds): ds.merge = {} if "standard" not in ds.merge.keys(): ds.merge["standard"] = {} # create the dictionary keys for this series ds.merge["standard"][series] = {} # output series name ds.merge["standard"][series]["output"] = series # source ds.merge["standard"][series]["source"] = ast.literal_eval( cf[section][series]["MergeSeries"]["Source"]) # create an empty series in ds if the output series doesn't exist yet if ds.merge["standard"][series]["output"] not in ds.series.keys(): data, flag, attr = qcutils.MakeEmptySeries( ds, ds.merge["standard"][series]["output"]) qcutils.CreateSeries(ds, ds.merge["standard"][series]["output"], data, Flag=flag, Attr=attr)
qcutils.get_xldatefromdatetime(ds) # get the year, month, day, hour, minute and second qcutils.get_ymdhmsfromdatetime(ds) # put the QC'd, smoothed and interpolated EVI into the data structure flag = numpy.zeros(len(dt_loc), dtype=numpy.int32) attr = qcutils.MakeAttributeDictionary( long_name="MODIS EVI, smoothed and interpolated", units="none", horiz_resolution="250m", cutout_size=str(site_cutout), evi_quality_threshold=str(evi_quality_threshold), evi_sd_threshold=str(evi_sd_threshold), evi_interpolate=str(evi_interpolate), evi_smooth_filter=str(evi_smooth_filter), sg_num_points=str(sg_num_points), sg_order=str(sg_num_points)) qcutils.CreateSeries(ds, "EVI", evi_interp2_smooth, flag, attr) attr = qcutils.MakeAttributeDictionary( long_name="MODIS EVI, interpolated", units="none", horiz_resolution="250m", cutout_size=str(site_cutout), evi_quality_threshold=str(evi_quality_threshold), evi_sd_threshold=str(evi_sd_threshold), evi_interpolate=str(evi_interpolate)) qcutils.CreateSeries(ds, "EVI_notsmoothed", evi_interp2, flag, attr) # now write the data structure to a netCDF file out_file = qcio.nc_open_write(out_name) qcio.nc_write_series(out_file, ds, ndims=1)
long_name="DateTime in local time zone", units="None") # add the Excel datetime, year, month etc qcutils.get_xldatefromdatetime(ds_aws_60minute) qcutils.get_ymdhmsfromdatetime(ds_aws_60minute) # loop over the series and take the average (every thing but Precip) or sum (Precip) for item in series_list: if "Precip" in item: data_30minute, flag_30minute, attr = qcutils.GetSeriesasMA( ds_aws_30minute, item, si=si_wholehour, ei=ei_wholehour) data_2d = numpy.reshape(data_30minute, (nRecs_30minute / 2, 2)) flag_2d = numpy.reshape(flag_30minute, (nRecs_30minute / 2, 2)) data_60minute = numpy.ma.sum(data_2d, axis=1) flag_60minute = numpy.ma.max(flag_2d, axis=1) qcutils.CreateSeries(ds_aws_60minute, item, data_60minute, Flag=flag_60minute, Attr=attr) elif "Wd" in item: Ws_30minute, flag_30minute, attr = qcutils.GetSeriesasMA( ds_aws_30minute, item, si=si_wholehour, ei=ei_wholehour) Wd_30minute, flag_30minute, attr = qcutils.GetSeriesasMA( ds_aws_30minute, item, si=si_wholehour, ei=ei_wholehour) U_30minute, V_30minute = qcutils.convert_WsWdtoUV( Ws_30minute, Wd_30minute) U_2d = numpy.reshape(U_30minute, (nRecs_30minute / 2, 2)) V_2d = numpy.reshape(V_30minute, (nRecs_30minute / 2, 2)) flag_2d = numpy.reshape(flag_30minute, (nRecs_30minute / 2, 2)) U_60minute = numpy.ma.sum(U_2d, axis=1) V_60minute = numpy.ma.sum(V_2d, axis=1) Ws_60minute, Wd_60minute = qcutils.convert_UVtoWsWd(
def l4to6qc(cf, ds3, AttrLevel, InLevel, OutLevel): """ Fill gaps in met data from other sources Integrate SOLO-ANN gap filled fluxes performed externally Generates L4 from L3 data Generates daily sums excel workbook Variable Series: Meteorological (MList): Ah_EC, Cc_7500_Av, ps, Ta_EC, Ws_CSAT, Wd_CSAT Radiation (RList): Fld, Flu, Fn, Fsd, Fsu Soil water content (SwsList): all variables containing Sws in variable name Soil (SList): Fg, Ts, SwsList Turbulent fluxes (FList): Fc_wpl, Fe_wpl, Fh, ustar Output (OList): MList, RList, SList, FList Parameters loaded from control file: zmd: z-d z0: roughness height Functions performed: qcts.AddMetVars qcts.ComputeDailySums qcts.InterpolateOverMissing (OList for gaps shorter than 3 observations, OList gaps shorter than 7 observations) qcts.GapFillFromAlternate (MList, RList) qcts.GapFillFromClimatology (Ah_EC, Fn, Fg, ps, Ta_EC, Ws_CSAT, OList) qcts.GapFillFromRatios (Fe, Fh, Fc) qcts.ReplaceOnDiff (Ws_CSAT, ustar) qcts.UstarFromFh qcts.ReplaceWhereMissing (Ustar) qcck.do_qcchecks """ if AttrLevel == 'False': ds3.globalattributes['Functions'] = '' AttrLevel = InLevel # check to ensure L4 functions are defined in controlfile if qcutils.cfkeycheck(cf, Base='Functions'): x = 0 y = 0 z = 0 else: log.error('FunctionList not found in control file') ds3x = copy.deepcopy(ds3) ds3x.globalattributes['nc_level'] = 'L3' ds3x.globalattributes['L4Functions'] = 'No L4-L6 functions applied' return ds3x # handle meta-data and import L4-L6 from external process if InLevel == 'L3': ds3x = copy.deepcopy(ds3) else: infilename = qcio.get_infilename_from_cf(cf, InLevel) ds3x = qcio.nc_read_series(infilename) for ThisOne in ds3.globalattributes.keys(): if ThisOne not in ds3x.globalattributes.keys(): ds3x.globalattributes[ThisOne] = ds3.globalattributes[ThisOne] for ThisOne in ds3.series.keys(): if ThisOne in ds3x.series.keys(): for attr in ds3.series[ThisOne]['Attr'].keys(): if attr not in [ 'ancillary_variables', 'long_name', 'standard_name', 'units' ]: ds3x.series[ThisOne]['Attr'][attr] = ds3.series[ ThisOne]['Attr'][attr] ds3x.globalattributes['nc_level'] = AttrLevel ds3x.globalattributes['EPDversion'] = sys.version ds3x.globalattributes['QC_version_history'] = cfg.__doc__ # put the control file name into the global attributes ds3x.globalattributes['controlfile_name'] = cf['controlfile_name'] if OutLevel == 'L6': ds3x.globalattributes['xlL6_datemode'] = ds3x.globalattributes[ 'xl_datemode'] ds3x.globalattributes['xl_datemode'] = ds3.globalattributes[ 'xl_datemode'] ds3x.globalattributes['xlL6_filename'] = ds3x.globalattributes[ 'xl_filename'] ds3x.globalattributes['xl_filename'] = ds3.globalattributes[ 'xl_filename'] ds3x.globalattributes['xlL6_moddatetime'] = ds3x.globalattributes[ 'xl_moddatetime'] ds3x.globalattributes['xl_moddatetime'] = ds3.globalattributes[ 'xl_moddatetime'] elif OutLevel == 'L5': ds3x.globalattributes['xlL5_datemode'] = ds3x.globalattributes[ 'xl_datemode'] ds3x.globalattributes['xl_datemode'] = ds3.globalattributes[ 'xl_datemode'] ds3x.globalattributes['xlL5_filename'] = ds3x.globalattributes[ 'xl_filename'] ds3x.globalattributes['xl_filename'] = ds3.globalattributes[ 'xl_filename'] ds3x.globalattributes['xlL5_moddatetime'] = ds3x.globalattributes[ 'xl_moddatetime'] ds3x.globalattributes['xl_moddatetime'] = ds3.globalattributes[ 'xl_moddatetime'] elif OutLevel == 'L4': ds3x.globalattributes['xlL4_datemode'] = ds3x.globalattributes[ 'xl_datemode'] ds3x.globalattributes['xl_datemode'] = ds3.globalattributes[ 'xl_datemode'] ds3x.globalattributes['xlL4_filename'] = ds3x.globalattributes[ 'xl_filename'] ds3x.globalattributes['xl_filename'] = ds3.globalattributes[ 'xl_filename'] ds3x.globalattributes['xlL4_moddatetime'] = ds3x.globalattributes[ 'xl_moddatetime'] ds3x.globalattributes['xl_moddatetime'] = ds3.globalattributes[ 'xl_moddatetime'] qcutils.prepOzFluxVars(cf, ds3x) # convert Fc [mgCO2 m-2 s-1] to Fc_co2 [mgCO2 m-2 s-1], Fc_c [mgC m-2 s-1], NEE [umol m-2 s-1] and NEP = - NEE if qcutils.cfkeycheck(cf, Base='Functions', ThisOne='convertFc' ) and cf['Functions']['convertFc'] == 'True': try: ds3x.globalattributes['L4Functions'] = ds3x.globalattributes[ 'L4Functions'] + ', convertFc' except: ds3x.globalattributes['L4Functions'] = 'convertFc' if 'Fc_co2' in ds3x.series.keys(): qcts.ConvertFc(cf, ds3x, Fco2_in='Fc_co2') else: qcts.ConvertFc(cf, ds3x) ds4x = copy.deepcopy(ds3x) for ThisOne in ['NEE', 'NEP', 'Fc', 'Fc_co2', 'Fc_c', 'Fe', 'Fh']: if ThisOne in ds4x.series.keys() and ThisOne in ds3.series.keys(): ds4x.series[ThisOne] = ds3.series[ThisOne].copy() for ThisOne in [ 'GPP', 'CE', 'ER_night', 'ER_dark', 'CE_day', 'CE_NEEmax', 'ER_bio', 'PD', 'ER_n', 'ER_LRF' ]: if ThisOne in ds4x.series.keys(): ds4x.series[ThisOne]['Data'] = numpy.ones( len(ds4x.series[ThisOne]['Data']), dtype=numpy.float64) * numpy.float64(c.missing_value) ds4x.series[ThisOne]['Flag'] = numpy.ones(len( ds4x.series[ThisOne]['Data']), dtype=numpy.int32) if InLevel == 'L4' or AttrLevel == 'L3': ds4, x = l4qc(cf, ds4x, InLevel, x) qcutils.get_coverage_individual(ds4) qcutils.get_coverage_groups(ds4) if qcutils.cfkeycheck(cf, Base='Functions', ThisOne='FlagStats' ) and cf['Functions']['FlagStats'] == 'True': qcio.get_seriesstats(cf, ds4) if OutLevel == 'L5' or OutLevel == 'L6': try: ds4y = copy.deepcopy(ds4) except: ds4y = copy.deepcopy(ds4x) for ThisOne in [ 'NEE', 'NEP', 'Fc', 'Fc_c', 'Fc_co2', 'Fc_c', 'Fe', 'Fh' ]: var, var_flag, var_attr = qcutils.GetSeriesasMA(ds3x, ThisOne) qcutils.CreateSeries(ds4y, ThisOne, var, Flag=var_flag, Attr=var_attr) ds4y.series[ThisOne]['Attr']['long_name'] = var_attr['long_name'] ds5, y = l5qc(cf, ds4y, y) qcutils.get_coverage_individual(ds5) qcutils.get_coverage_groups(ds5) if qcutils.cfkeycheck(cf, Base='Functions', ThisOne='FlagStats' ) and cf['Functions']['FlagStats'] == 'True': qcio.get_seriesstats(cf, ds5) if OutLevel == 'L6': ds5z = copy.deepcopy(ds5) for ThisOne in [ 'GPP', 'CE', 'ER_night', 'ER_dark', 'CE_day', 'CE_NEEmax', 'ER_bio', 'PD', 'ER_n', 'ER_LRF' ]: if ThisOne in ds3x.series.keys(): ds5z.series[ThisOne] = ds3x.series[ThisOne].copy() ds6, z = l6qc(cf, ds5z, z) qcutils.get_coverage_individual(ds6) qcutils.get_coverage_groups(ds6) if qcutils.cfkeycheck(cf, Base='Functions', ThisOne='FlagStats' ) and cf['Functions']['FlagStats'] == 'True': qcio.get_seriesstats(cf, ds6) # calculate daily statistics if qcutils.cfkeycheck(cf, Base='Functions', ThisOne='Sums'): if cf['Functions']['Sums'] == 'L6': ds6.globalattributes[ 'Functions'] = ds6.globalattributes['Functions'] + ', Sums' try: ds6.globalattributes['L6Functions'] = ds6.globalattributes[ 'L6Functions'] + ', Sums' except: ds6.globalattributes['L6Functions'] = 'Sums' qcts.do_sums(cf, ds6) elif cf['Functions']['Sums'] == 'L5': ds5.globalattributes[ 'Functions'] = ds5.globalattributes['Functions'] + ', Sums' try: ds5.globalattributes['L5Functions'] = ds5.globalattributes[ 'L5Functions'] + ', Sums' except: ds5.globalattributes['L5Functions'] = 'Sums' qcts.do_sums(cf, ds5) elif cf['Functions']['Sums'] == 'L4': ds4.globalattributes[ 'Functions'] = ds4.globalattributes['Functions'] + ', Sums' try: ds4.globalattributes['L4Functions'] = ds4.globalattributes[ 'L5Functions'] + ', Sums' except: ds4.globalattributes['L4Functions'] = 'Sums' qcts.do_sums(cf, ds4) # compute climatology if qcutils.cfkeycheck(cf, Base='Functions', ThisOne='climatology'): if cf['Functions']['climatology'] == 'L6': ds6.globalattributes['Functions'] = ds6.globalattributes[ 'Functions'] + ', climatology' try: ds6.globalattributes['L6Functions'] = ds6.globalattributes[ 'L6Functions'] + ', climatology' except: ds6.globalattributes['L6Functions'] = 'climatology' qcts.do_climatology(cf, ds6) elif cf['Functions']['climatology'] == 'L5': ds5.globalattributes['Functions'] = ds5.globalattributes[ 'Functions'] + ', climatology' try: ds5.globalattributes['L5Functions'] = ds5.globalattributes[ 'L5Functions'] + ', climatology' except: ds5.globalattributes['L5Functions'] = 'climatology' qcts.do_climatology(cf, ds5) elif cf['Functions']['climatology'] == 'L4': ds4.globalattributes['Functions'] = ds4.globalattributes[ 'Functions'] + ', climatology' try: ds4.globalattributes['L4Functions'] = ds4.globalattributes[ 'L4Functions'] + ', climatology' except: ds4.globalattributes['L4Functions'] = 'climatology' qcts.do_climatology(cf, ds4) if OutLevel == 'L4' and (InLevel == 'L3' or InLevel == 'L4'): if x == 0: ds4.globalattributes['Functions'] = ds4.globalattributes[ 'Functions'] + ', No further L4 gapfilling' try: ds4.globalattributes['L4Functions'] = ds4.globalattributes[ 'L4Functions'] + ', No further L4 gapfilling' except: ds4.globalattributes[ 'L4Functions'] = 'No further L4 gapfilling' log.warn(' L4: no record of gapfilling functions') return ds4 elif OutLevel == 'L5': if x == 0: if InLevel == 'L3' or InLevel == 'L4': ds4.globalattributes['Functions'] = ds4.globalattributes[ 'Functions'] + ', No further L4 gapfilling' try: ds4.globalattributes['L4Functions'] = ds4.globalattributes[ 'L4Functions'] + ', No further L4 gapfilling' except: ds4.globalattributes[ 'L4Functions'] = 'No further L4 gapfilling' log.warn(' L4: no record of gapfilling functions') ds5.globalattributes['Functions'] = ds5.globalattributes[ 'Functions'] + ', No further L4 gapfilling' try: ds5.globalattributes['L4Functions'] = ds5.globalattributes[ 'L4Functions'] + ', No further L4 gapfilling' except: ds5.globalattributes[ 'L4Functions'] = 'No further L4 gapfilling' if y == 0: ds5.globalattributes['Functions'] = ds5.globalattributes[ 'Functions'] + ', No further L5 gapfilling' try: ds5.globalattributes['L5Functions'] = ds5.globalattributes[ 'L5Functions'] + ', No further L5 gapfilling' except: ds5.globalattributes[ 'L5Functions'] = 'No further L5 gapfilling' log.warn(' L5: no record of gapfilling functions') return ds4, ds5 elif OutLevel == 'L6': if x == 0: if InLevel == 'L3' or InLevel == 'L4': ds4.globalattributes['Functions'] = ds4.globalattributes[ 'Functions'] + ', No further L4 gapfilling' try: ds4.globalattributes['L4Functions'] = ds4.globalattributes[ 'L4Functions'] + ', No further L4 gapfilling' except: ds4.globalattributes[ 'L4Functions'] = 'No further L4 gapfilling' log.warn(' L4: no record of gapfilling functions') if InLevel == 'L3' or InLevel == 'L4' or InLevel == 'L5': ds5.globalattributes['Functions'] = ds5.globalattributes[ 'Functions'] + ', No further L4 gapfilling' try: ds5.globalattributes['L4Functions'] = ds5.globalattributes[ 'L4Functions'] + ', No further L4 gapfilling' except: ds5.globalattributes[ 'L4Functions'] = 'No further L4 gapfilling' log.warn(' L4: no record of gapfilling functions') ds6.globalattributes['Functions'] = ds6.globalattributes[ 'Functions'] + ', No further L4 gapfilling' try: ds6.globalattributes['L4Functions'] = ds6.globalattributes[ 'L4Functions'] + ', No further L4 gapfilling' except: ds6.globalattributes[ 'L4Functions'] = 'No further L4 gapfilling' if y == 0: if InLevel == 'L3' or InLevel == 'L4' or InLevel == 'L5': ds5.globalattributes['Functions'] = ds5.globalattributes[ 'Functions'] + ', No further L5 gapfilling' try: ds5.globalattributes['L5Functions'] = ds5.globalattributes[ 'L5Functions'] + ', No further L5 gapfilling' except: ds5.globalattributes[ 'L5Functions'] = 'No further L5 gapfilling' log.warn(' L5: no record of gapfilling functions') ds6.globalattributes['Functions'] = ds6.globalattributes[ 'Functions'] + ', No further L5 gapfilling' try: ds6.globalattributes['L5Functions'] = ds6.globalattributes[ 'L5Functions'] + ', No further L5 gapfilling' except: ds6.globalattributes[ 'L5Functions'] = 'No further L5 gapfilling' if z == 0: ds6.globalattributes['Functions'] = ds6.globalattributes[ 'Functions'] + ', No further L6 partitioning' try: ds6.globalattributes['L6Functions'] = ds5.globalattributes[ 'L6Functions'] + ', No further L6 partitioning' except: ds6.globalattributes[ 'L6Functions'] = 'No further L6 partitioning' log.warn(' L6: no record of gapfilling functions') return ds4, ds5, ds6
idx = numpy.searchsorted(ldt_all, numpy.intersect1d(ldt_all, ldt_one)) # then we get a list of the variables to copy series_list = ds_out[i].series.keys() # and remove the datetime if "DateTime" in series_list: series_list.remove("DateTime") # and then we loop over the variables to be copied for label in series_list: # append a number, unique to each ISD station, to the variable label all_label = label + "_" + str(i) # create empty data and flag arrays variable = qcutils.create_empty_variable(all_label, nrecs) qcutils.CreateSeries(ds_all, all_label, variable["Data"], Flag=variable["Flag"], Attr=variable["Attr"]) # read the data out of the ISD site data structure data, flag, attr = qcutils.GetSeriesasMA(ds_out[i], label) # add the ISD site ID attr["isd_site_id"] = isd_site_id # put the data, flag and attributes into the all-in-one data structure ds_all.series[all_label]["Data"][idx] = data ds_all.series[all_label]["Flag"][idx] = flag ds_all.series[all_label]["Attr"] = copy.deepcopy(attr) # write the netCDF file with the combined data for this year if len(fluxnet_id) == 0: nc_dir_path = os.path.join(out_base_path, site, "Data", "ISD") nc_file_name = site + "_ISD_" + str(year) + ".nc" else:
def interpolate_ds(ds_in, ts, k=3): """ Purpose: Interpolate the contents of a data structure onto a different time step. Assumptions: Usage: Author: PRI Date: June 2017 """ # instance the output data structure ds_out = qcio.DataStructure() # copy the global attributes for key in ds_in.globalattributes.keys(): ds_out.globalattributes[key] = ds_in.globalattributes[key] # add the time step ds_out.globalattributes["time_step"] = str(ts) # generate a regular time series at the required time step dt = ds_in.series["DateTime"]["Data"] dt0 = dt[0] - datetime.timedelta(minutes=30) start = datetime.datetime(dt0.year, dt0.month, dt0.day, dt0.hour, 0, 0) dt1 = dt[-1] + datetime.timedelta(minutes=30) end = datetime.datetime(dt1.year, dt1.month, dt1.day, dt1.hour, 0, 0) idt = [ result for result in perdelta(start, end, datetime.timedelta(minutes=ts)) ] x1 = numpy.array([toTimestamp(dt[i]) for i in range(len(dt))]) x2 = numpy.array([toTimestamp(idt[i]) for i in range(len(idt))]) # loop over the series in the data structure and interpolate ds_out.series["DateTime"] = {} ds_out.series["DateTime"]["Data"] = idt ds_out.series["DateTime"]["Flag"] = numpy.zeros(len(idt)) ds_out.series["DateTime"]["Attr"] = { "long_name": "Datetime", "units": "none" } ds_out.globalattributes["nc_nrecs"] = len(idt) series_list = list(ds_in.series.keys()) if "DateTime" in series_list: series_list.remove("DateTime") for label in series_list: #print label data_in, flag_in, attr_in = qcutils.GetSeriesasMA(ds_in, label) # check if we are dealing with precipitation if "Precip" in label: # precipitation shouldn't be interpolated, just assign any precipitation # to the ISD time stamp. data_out = numpy.ma.zeros(len(idt), dtype=numpy.float64) idx = numpy.searchsorted(x2, numpy.intersect1d(x2, x1)) data_out[idx] = data_in else: # interpolate everything else data_out = interpolate_1d(x1, data_in, x2) flag_out = numpy.zeros(len(idt)) attr_out = attr_in qcutils.CreateSeries(ds_out, label, data_out, Flag=flag_out, Attr=attr_out) return ds_out
def ApplyTurbulenceFilter(cf,ds,ustar_threshold=None): """ Purpose: Usage: Author: Date: """ opt = ApplyTurbulenceFilter_checks(cf,ds) if not opt["OK"]: return # local point to datetime series ldt = ds.series["DateTime"]["Data"] # time step ts = int(ds.globalattributes["time_step"]) # dictionary of utar thresold values if ustar_threshold==None: ustar_dict = qcrp.get_ustar_thresholds(cf,ldt) else: ustar_dict = qcrp.get_ustar_thresholds_annual(ldt,ustar_threshold) # initialise a dictionary for the indicator series indicators = {} # get data for the indicator series ustar,ustar_flag,ustar_attr = qcutils.GetSeriesasMA(ds,"ustar") Fsd,f,a = qcutils.GetSeriesasMA(ds,"Fsd") if "solar_altitude" not in ds.series.keys(): qcts.get_synthetic_fsd(ds) Fsd_syn,f,a = qcutils.GetSeriesasMA(ds,"Fsd_syn") sa,f,a = qcutils.GetSeriesasMA(ds,"solar_altitude") # get the day/night indicator series # indicators["day"] = 1 ==> day time, indicators["day"] = 0 ==> night time indicators["day"] = qcrp.get_day_indicator(cf,Fsd,Fsd_syn,sa) ind_day = indicators["day"]["values"] # get the turbulence indicator series if opt["turbulence_filter"].lower()=="ustar": # indicators["turbulence"] = 1 ==> turbulent, indicators["turbulence"] = 0 ==> not turbulent indicators["turbulence"] = qcrp.get_turbulence_indicator_ustar(ldt,ustar,ustar_dict,ts) elif opt["turbulence_filter"].lower()=="ustar_evg": # ustar >= threshold ==> ind_ustar = 1, ustar < threshold == ind_ustar = 0 indicators["ustar"] = qcrp.get_turbulence_indicator_ustar(ldt,ustar,ustar_dict,ts) ind_ustar = indicators["ustar"]["values"] # ustar >= threshold during day AND ustar has been >= threshold since sunset ==> indicators["turbulence"] = 1 # indicators["turbulence"] = 0 during night once ustar has dropped below threshold even if it # increases above the threshold later in the night indicators["turbulence"] = qcrp.get_turbulence_indicator_ustar_evg(ldt,ind_day,ind_ustar,ustar,ustar_dict,ts) elif opt["turbulence_filter"].lower()=="l": #indicators["turbulence] = get_turbulence_indicator_l(ldt,L,z,d,zmdonL_threshold) indicators["turbulence"] = numpy.ones(len(ldt)) msg = " Use of L as turbulence indicator not implemented, no filter applied" log.warning(msg) else: msg = " Unrecognised turbulence filter option (" msg = msg+opt["turbulence_filter"]+"), no filter applied" log.error(msg) return # initialise the final indicator series as the turbulence indicator # subsequent filters will modify the final indicator series # we must use copy.deepcopy() otherwise the "values" array will only # be copied by reference not value. Damn Python's default of copy by reference! indicators["final"] = copy.deepcopy(indicators["turbulence"]) # check to see if the user wants to accept all day time observations # regardless of ustar value if opt["accept_day_times"].lower()=="yes": # if yes, then we force the final indicator to be 1 # if ustar is below the threshold during the day. idx = numpy.where(indicators["day"]["values"]==1)[0] indicators["final"]["values"][idx] = numpy.int(1) indicators["final"]["attr"].update(indicators["day"]["attr"]) # get the evening indicator series indicators["evening"] = qcrp.get_evening_indicator(cf,Fsd,Fsd_syn,sa,ts) indicators["dayevening"] = {"values":indicators["day"]["values"]+indicators["evening"]["values"]} indicators["dayevening"]["attr"] = indicators["day"]["attr"].copy() indicators["dayevening"]["attr"].update(indicators["evening"]["attr"]) if opt["use_evening_filter"].lower()=="yes": idx = numpy.where(indicators["dayevening"]["values"]==0)[0] indicators["final"]["values"][idx] = numpy.int(0) indicators["final"]["attr"].update(indicators["dayevening"]["attr"]) # save the indicator series ind_flag = numpy.zeros(len(ldt)) long_name = "Turbulence indicator, 1 for turbulent, 0 for non-turbulent" ind_attr = qcutils.MakeAttributeDictionary(long_name=long_name,units="None") qcutils.CreateSeries(ds,"turbulence_indicator",indicators["turbulence"]["values"],Flag=ind_flag,Attr=ind_attr) long_name = "Day indicator, 1 for day time, 0 for night time" ind_attr = qcutils.MakeAttributeDictionary(long_name=long_name,units="None") qcutils.CreateSeries(ds,"day_indicator",indicators["day"]["values"],Flag=ind_flag,Attr=ind_attr) long_name = "Evening indicator, 1 for evening, 0 for not evening" ind_attr = qcutils.MakeAttributeDictionary(long_name=long_name,units="None") qcutils.CreateSeries(ds,"evening_indicator",indicators["evening"]["values"],Flag=ind_flag,Attr=ind_attr) long_name = "Day/evening indicator, 1 for day/evening, 0 for not day/evening" ind_attr = qcutils.MakeAttributeDictionary(long_name=long_name,units="None") qcutils.CreateSeries(ds,"dayevening_indicator",indicators["dayevening"]["values"],Flag=ind_flag,Attr=ind_attr) long_name = "Final indicator, 1 for use data, 0 for don't use data" ind_attr = qcutils.MakeAttributeDictionary(long_name=long_name,units="None") qcutils.CreateSeries(ds,"final_indicator",indicators["final"]["values"],Flag=ind_flag,Attr=ind_attr) # loop over the series to be filtered for series in opt["filter_list"]: msg = " Applying "+opt["turbulence_filter"]+" filter to "+series log.info(msg) # get the data data,flag,attr = qcutils.GetSeriesasMA(ds,series) # continue to next series if this series has been filtered before if "turbulence_filter" in attr: msg = " Series "+series+" has already been filtered, skipping ..." log.warning(msg) continue # save the non-filtered data qcutils.CreateSeries(ds,series+"_nofilter",data,Flag=flag,Attr=attr) # now apply the filter data_filtered = numpy.ma.masked_where(indicators["final"]["values"]==0,data,copy=True) flag_filtered = numpy.copy(flag) idx = numpy.where(indicators["final"]["values"]==0)[0] flag_filtered[idx] = numpy.int32(61) # update the series attributes for item in indicators["final"]["attr"].keys(): attr[item] = indicators["final"]["attr"][item] # and write the filtered data to the data structure qcutils.CreateSeries(ds,series,data_filtered,Flag=flag_filtered,Attr=attr) # and write a copy of the filtered datas to the data structure so it # will still exist once the gap filling has been done qcutils.CreateSeries(ds,series+"_filtered",data_filtered,Flag=flag_filtered,Attr=attr) return
def do_dependencycheck(cf, ds, section, series, code=23, mode="quiet"): """ Purpose: Usage: Author: PRI Date: Back in the day """ if len(section) == 0 and len(series) == 0: return if len(section) == 0: section = qcutils.get_cfsection(cf, series=series, mode='quiet') if "DependencyCheck" not in cf[section][series].keys(): return if "Source" not in cf[section][series]["DependencyCheck"]: msg = " DependencyCheck: keyword Source not found for series " + series + ", skipping ..." logger.error(msg) return if mode == "verbose": msg = " Doing DependencyCheck for " + series logger.info(msg) # get the precursor source list from the control file source_list = ast.literal_eval( cf[section][series]["DependencyCheck"]["Source"]) # check to see if the "ignore_missing" flag is set opt = qcutils.get_keyvaluefromcf(cf, [section, series, "DependencyCheck"], "ignore_missing", default="no") ignore_missing = False if opt.lower() in ["yes", "y", "true", "t"]: ignore_missing = True # get the data dependent_data, dependent_flag, dependent_attr = qcutils.GetSeries( ds, series) # loop over the precursor source list for item in source_list: # check the precursor is in the data structure if item not in ds.series.keys(): msg = " DependencyCheck: " + series + " precursor series " + item + " not found, skipping ..." logger.warning(msg) continue # get the precursor data precursor_data, precursor_flag, precursor_attr = qcutils.GetSeries( ds, item) # check if the user wants to ignore missing precursor data if ignore_missing: # they do, so make an array of missing values nRecs = int(ds.globalattributes["nc_nrecs"]) missing_array = numpy.ones(nRecs) * float(c.missing_value) # and find the indicies of elements equal to the missing value bool_array = numpy.isclose(precursor_data, missing_array) idx = numpy.where(bool_array == True)[0] # and set these flags to 0 so missing data is ignored precursor_flag[idx] = numpy.int32(0) # mask the dependent data where the precursor flag shows data not OK dependent_data = numpy.ma.masked_where( numpy.mod(precursor_flag, 10) != 0, dependent_data) # get an index where the precursor flag shows data not OK idx = numpy.ma.where(numpy.mod(precursor_flag, 10) != 0)[0] # set the dependent QC flag dependent_flag[idx] = numpy.int32(code) # put the data back into the data structure dependent_attr["DependencyCheck_source"] = str(source_list) qcutils.CreateSeries(ds, series, dependent_data, dependent_flag, dependent_attr) # our work here is done return
time_units = getattr(bios_ncfile.variables["time"],"units") qcutils.get_datetimefromnctime(ds_30,time,time_units) qcutils.round_datetime(ds_30,mode="nearest_timestep") if qcutils.CheckTimeStep(ds_30): qcutils.FixTimeStep(ds_30) ldt_30 = ds_30.series["DateTime"]["Data"] si = qcutils.GetDateIndex(ldt_30,start_date,default=0,ts=ts,match="startnexthour") ei = qcutils.GetDateIndex(ldt_30,end_date,default=len(ldt_30),ts=ts,match="endprevioushour") ds_30.series["DateTime"]["Data"] = ds_30.series["DateTime"]["Data"][si:ei+1] ds_30.series["DateTime"]["Flag"] = ds_30.series["DateTime"]["Flag"][si:ei+1] ldt_30 = ds_30.series["DateTime"]["Data"] nRecs = ds_30.globalattributes["nc_nrecs"] = len(ldt_30) flag = numpy.zeros(nRecs) qcutils.get_ymdhmsfromdatetime(ds_30) xl_date_loc = qcutils.get_xldatefromdatetime(ds_30) attr = qcutils.MakeAttributeDictionary(long_name="Date/time (local) in Excel format",units="days since 1899-12-31 00:00:00") qcutils.CreateSeries(ds_30,"xlDateTime",xl_date_loc,flag,attr) # get the data for label in var_list: bios_name = cf["Variables"][label]["bios_name"] if len(bios_ncfile.variables[bios_name].shape)==1: #print label+" has 1 dimension" data = bios_ncfile.variables[bios_name][:][si:ei+1] elif len(bios_ncfile.variables[bios_name].shape)==2: #print label+" has 2 dimensions" data = bios_ncfile.variables[bios_name][:,0][si:ei+1] elif len(bios_ncfile.variables[bios_name].shape)==3: #print label+" has 3 dimensions" data = bios_ncfile.variables[bios_name][:,0,0][si:ei+1] attr = {} for this_attr in bios_ncfile.variables[bios_name].ncattrs(): attr[this_attr] = getattr(bios_ncfile.variables[bios_name],this_attr)
alt_solar_limit = float(site_sa_limit) * numpy.ones(len(alt_solar_3hr)) sa = numpy.where(alt_solar_3hr <= float(site_sa_limit), alt_solar_limit, alt_solar_3hr) coef_3hr = Fsd_erai_3hr / numpy.sin(numpy.deg2rad(sa)) # get the spline interpolation function s = InterpolatedUnivariateSpline(erai_time_3hr, coef_3hr, k=1) # get the coefficient at the tower time step coef_tts = s(erai_time_tts) # get the downwelling solar radiation at the tower time step Fsd_erai_tts = coef_tts * numpy.sin(numpy.deg2rad(alt_solar_tts)) flag = numpy.zeros(len(Fsd_erai_tts), dtype=numpy.int32) attr = qcutils.MakeAttributeDictionary( long_name="Downwelling short wave radiation", units="W/m2") qcutils.CreateSeries(ds_erai, "Fsd", Fsd_erai_tts, Flag=flag, Attr=attr) # Interpolate the 3 hourly accumulated net shortwave to the tower time step # NOTE: ERA-I variables are dimensioned [time,latitude,longitude] Fn_sw_3d = erai_file.variables["ssr"][:, :, :] Fn_sw_accum = Fn_sw_3d[:, site_lat_index, site_lon_index] # Net shortwave in ERA-I is a cummulative value that is reset to 0 at 0300 and 1500 UTC. # Here we convert the cummulative values to 3 hourly values. Fn_sw_erai_3hr = numpy.ediff1d(Fn_sw_accum, to_begin=0) # deal with the reset times at 0300 and 1500 idx = numpy.where((hour_utc == 3) | (hour_utc == 15))[0] Fn_sw_erai_3hr[idx] = Fn_sw_accum[idx] # get the average value over the 3 hourly period Fn_sw_erai_3hr = Fn_sw_erai_3hr / (erai_timestep * 60)
def gfSOLO_createdict(cf, ds, series): """ Creates a dictionary in ds to hold information about the SOLO data used to gap fill the tower data.""" # get the section of the control file containing the series section = qcutils.get_cfsection(cf, series=series, mode="quiet") # return without doing anything if the series isn't in a control file section if len(section) == 0: logger.error( "GapFillUsingSOLO: Series %s not found in control file, skipping ...", series) return # create the solo directory in the data structure if "solo" not in dir(ds): ds.solo = {} # name of SOLO output series in ds output_list = cf[section][series]["GapFillUsingSOLO"].keys() # loop over the outputs listed in the control file for output in output_list: # create the dictionary keys for this series ds.solo[output] = {} # get the target if "target" in cf[section][series]["GapFillUsingSOLO"][output]: ds.solo[output]["label_tower"] = cf[section][series][ "GapFillUsingSOLO"][output]["target"] else: ds.solo[output]["label_tower"] = series # site name ds.solo[output]["site_name"] = ds.globalattributes["site_name"] # list of SOLO settings if "solo_settings" in cf[section][series]["GapFillUsingSOLO"][output]: ss_list = ast.literal_eval(cf[section][series]["GapFillUsingSOLO"] [output]["solo_settings"]) ds.solo[output]["solo_settings"] = {} ds.solo[output]["solo_settings"]["nodes_target"] = int(ss_list[0]) ds.solo[output]["solo_settings"]["training"] = int(ss_list[1]) ds.solo[output]["solo_settings"]["factor"] = int(ss_list[2]) ds.solo[output]["solo_settings"]["learningrate"] = float( ss_list[3]) ds.solo[output]["solo_settings"]["iterations"] = int(ss_list[4]) # list of drivers ds.solo[output]["drivers"] = ast.literal_eval( cf[section][series]["GapFillUsingSOLO"][output]["drivers"]) # apply ustar filter opt = qcutils.get_keyvaluefromcf( cf, [section, series, "GapFillUsingSOLO", output], "turbulence_filter", default="") ds.solo[output]["turbulence_filter"] = opt opt = qcutils.get_keyvaluefromcf( cf, [section, series, "GapFillUsingSOLO", output], "daynight_filter", default="") ds.solo[output]["daynight_filter"] = opt # results of best fit for plotting later on ds.solo[output]["results"] = { "startdate": [], "enddate": [], "No. points": [], "r": [], "Bias": [], "RMSE": [], "Frac Bias": [], "NMSE": [], "Avg (obs)": [], "Avg (SOLO)": [], "Var (obs)": [], "Var (SOLO)": [], "Var ratio": [], "m_ols": [], "b_ols": [] } # create an empty series in ds if the SOLO output series doesn't exist yet if output not in ds.series.keys(): data, flag, attr = qcutils.MakeEmptySeries(ds, output) qcutils.CreateSeries(ds, output, data, flag, attr)
# get the year, month, day, hour, minute and second qcutils.get_ymdhmsfromdatetime(ds) # put the QC'd, smoothed and interpolated EVI into the data structure flag = numpy.zeros(len(dt_loc), dtype=numpy.int32) attr = qcutils.MakeAttributeDictionary( long_name="MODIS EVI, smoothed and interpolated", units="none", horiz_resolution="250m", cutout_size=str(site_cutout), evi_quality_threshold=str(evi_quality_threshold), evi_sd_threshold=str(evi_sd_threshold), evi_interpolate=str(evi_interpolate), evi_smooth_filter=str(evi_smooth_filter), sg_num_points=str(sg_num_points), sg_order=str(sg_num_points)) qcutils.CreateSeries(ds, "EVI", evi_interp2_smooth, Flag=flag, Attr=attr) attr = qcutils.MakeAttributeDictionary( long_name="MODIS EVI, interpolated", units="none", horiz_resolution="250m", cutout_size=str(site_cutout), evi_quality_threshold=str(evi_quality_threshold), evi_sd_threshold=str(evi_sd_threshold), evi_interpolate=str(evi_interpolate)) qcutils.CreateSeries(ds, "EVI_notsmoothed", evi_interp2, Flag=flag, Attr=attr) # now write the data structure to a netCDF file
logging.error("Requested variable " + access_name + " not found in ACCESS data") continue attr = {} for this_attr in f.varattr[access_name].keys(): attr[this_attr] = f.varattr[access_name][this_attr] attr["missing_value"] = c.missing_value # loop over all ACCESS grids and give them standard OzFlux names with the grid idices appended for i in range(0, 3): for j in range(0, 3): if len(f.variables[access_name].shape) == 3: var_ij = var + '_' + str(i) + str(j) series = f.variables[access_name][:, i, j] qcutils.CreateSeries(ds_60minutes, var_ij, series, Flag=flag_60minutes, Attr=attr) elif len(f.variables[access_name].shape) == 4: var_ij = var + '_' + str(i) + str(j) series = f.variables[access_name][:, 0, i, j] qcutils.CreateSeries(ds_60minutes, var_ij, series, Flag=flag_60minutes, Attr=attr) else: print "Unrecognised variable (" + var + ") dimension in ACCESS file" #sys.exit() # trap valid_date==0 occurrences, these happened in some of the files produced # in the second batch while the accum_prcp was being sorted out