def interpolate_ds(ds_in, ts): """ Purpose: Interpolate the contents of a data structure onto a different time step. Assumptions: Usage: Author: PRI Date: June 2017 """ logger.info("Interpolating data") # instance the output data structure ds_out = qcio.DataStructure() # copy the global attributes ds_out.globalattributes = copy.deepcopy(ds_in.globalattributes) # add the time step ds_out.globalattributes["time_step"] = str(ts) # generate a regular time series at the required time step dt = ds_in.series["DateTime"]["Data"] dt0 = qcutils.rounddttots(dt[0], ts=ts) if dt0 < dt[0]: dt0 = dt0 + datetime.timedelta(minutes=ts) dt1 = qcutils.rounddttots(dt[-1], ts=ts) if dt1 > dt[-1]: dt1 = dt1 - datetime.timedelta(minutes=ts) idt = [result for result in qcutils.perdelta(dt0, dt1, datetime.timedelta(minutes=ts))] x1 = numpy.array([toTimestamp(dt[i]) for i in range(len(dt))]) x2 = numpy.array([toTimestamp(idt[i]) for i in range(len(idt))]) # loop over the series in the data structure and interpolate flag = numpy.zeros(len(idt), dtype=numpy.int32) attr = {"long_name":"Datetime", "units":"none"} ldt_var = {"Label":"DateTime", "Data":idt, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds_out, ldt_var) qcutils.get_nctime_from_datetime(ds_out) nrecs = len(idt) ds_out.globalattributes["nc_nrecs"] = nrecs # first, we do the air temperature, dew point temperature and surface pressure f0 = numpy.zeros(nrecs, dtype=numpy.int32) f1 = numpy.ones(nrecs, dtype=numpy.int32) for label in ["Ta", "Td", "ps", "RH", "Ah", "q"]: var_out = qcutils.create_empty_variable(label, nrecs, datetime=idt) var_in = qcutils.GetVariable(ds_in, label) var_out["Data"] = interpolate_1d(x1, var_in["Data"], x2) var_out["Flag"] = numpy.where(numpy.ma.getmaskarray(var_out["Data"])==True, f1, f0) var_out["Attr"] = copy.deepcopy(var_in["Attr"]) qcutils.CreateVariable(ds_out, var_out) # now clamp the dew point so that TD <= TA Ta = qcutils.GetVariable(ds_out, "Ta") Td = qcutils.GetVariable(ds_out, "Td") Td["Data"] = numpy.ma.where(Td["Data"]<=Ta["Data"], x=Td["Data"], y=Ta["Data"]) qcutils.CreateVariable(ds_out, Td) # now we do wind speed and direction by converting to U and V components interpolate_wswd(ds_in, x1, ds_out, x2) # and lastly, do precipitation interpolate_precip(ds_in, x1, ds_out, x2) return ds_out
def gfMDS_make_data_array(ds, current_year, info): """ Purpose: Create a data array for the MDS gap filling routine. The array constructed here will be written to a CSV file that is read by the MDS C code. Usage: Side Effects: The constructed data arrays are full years. That is they run from YYYY-01-01 00:30 to YYYY+1-01-01 00:00. Missing data is represented as -9999. Author: PRI Date: May 2018 """ ldt = qcutils.GetVariable(ds, "DateTime") nrecs = ds.globalattributes["nc_nrecs"] ts = int(ds.globalattributes["time_step"]) start = datetime.datetime(current_year, 1, 1, 0, 30, 0) end = datetime.datetime(current_year + 1, 1, 1, 0, 0, 0) cdt = numpy.array([ dt for dt in qcutils.perdelta(start, end, datetime.timedelta(minutes=ts)) ]) mt = numpy.ones(len(cdt)) * float(-9999) # need entry for the timestamp and the target ... array_list = [cdt, mt] # ... and entries for the drivers for driver in info["drivers"]: array_list.append(mt) # now we can create the data array data = numpy.stack(array_list, axis=-1) si = qcutils.GetDateIndex(ldt["Data"], start, default=0) ei = qcutils.GetDateIndex(ldt["Data"], end, default=nrecs) dt = qcutils.GetVariable(ds, "DateTime", start=si, end=ei) idx1, _ = qcutils.FindMatchingIndices(cdt, dt["Data"]) pfp_label_list = [info["target"]] + info["drivers"] mds_label_list = [info["target_mds"]] + info["drivers_mds"] header = "TIMESTAMP" fmt = "%12i" for n, label in enumerate(pfp_label_list): var = qcutils.GetVariable(ds, label, start=si, end=ei) data[idx1, n + 1] = var["Data"] header = header + "," + mds_label_list[n] fmt = fmt + "," + "%f" # convert datetime to ISO dates data[:, 0] = numpy.array([int(xdt.strftime("%Y%m%d%H%M")) for xdt in cdt]) return data, header, fmt
def make_data_array(ds, current_year): ldt = qcutils.GetVariable(ds, "DateTime") nrecs = ds.globalattributes["nc_nrecs"] ts = int(ds.globalattributes["time_step"]) start = datetime.datetime(current_year, 1, 1, 0, 30, 0) end = datetime.datetime(current_year + 1, 1, 1, 0, 0, 0) cdt = numpy.array([ dt for dt in qcutils.perdelta(start, end, datetime.timedelta(minutes=ts)) ]) mt = numpy.ones(len(cdt)) * float(-9999) data = numpy.stack([cdt, mt, mt, mt, mt, mt, mt, mt], axis=-1) si = qcutils.GetDateIndex(ldt["Data"], start, default=0) ei = qcutils.GetDateIndex(ldt["Data"], end, default=nrecs) dt = qcutils.GetVariable(ds, "DateTime", start=si, end=ei) idx1, idx2 = qcutils.FindMatchingIndices(cdt, dt["Data"]) for n, label in enumerate(["Fc", "VPD", "ustar", "Ta", "Fsd", "Fh", "Fe"]): var = qcutils.GetVariable(ds, label, start=si, end=ei) data[idx1, n + 1] = var["Data"] # convert datetime to ISO dates data[:, 0] = numpy.array([int(xdt.strftime("%Y%m%d%H%M")) for xdt in cdt]) return data
evi_median = numpy.ma.median(evi.reshape(evi_qc.shape[0], -1), axis=1) evi_mean = numpy.ma.mean(evi.reshape(evi_qc.shape[0], -1), axis=1) evi_sd = numpy.ma.std(evi.reshape(evi_qc.shape[0], -1), axis=1) evi_mean = numpy.ma.masked_where(evi_sd > evi_sd_threshold, evi_mean) evi_sd = numpy.ma.masked_where(evi_sd > evi_sd_threshold, evi_sd) # strip out masked elements, convert to ndarray from masked array and get various # time series to do the interpolation idx = numpy.where(numpy.ma.getmaskarray(evi_mean) == False)[0] evi_raw = numpy.array(evi_mean[idx]) modis_time_raw = numpy.array(modis_time[idx]) modis_dt_raw = netCDF4.num2date(modis_time_raw, modis_time_units) start_date = modis_dt_raw[0] end_date = modis_dt_raw[-1] tdts = datetime.timedelta(days=16) modis_dt_interp = [ result for result in qcutils.perdelta(start_date, end_date, tdts) ] modis_time_interp = netCDF4.date2num(modis_dt_interp, modis_time_units) # fill the missing MODIS data with linear interpolation if evi_interpolate.lower() == "linear": k = 1 elif evi_interpolate.lower() == "quadratic": k = 2 elif evi_interpolate.lower() == "cubic": k = 3 else: print "Unrecognised interpolation option, using linear ..." k = 1 s = scipy.interpolate.InterpolatedUnivariateSpline(modis_time_raw, evi_raw, k=k)
ds_erai.globalattributes["nc_level"] = "L1" # get the UTC and local datetime series site_tz = pytz.timezone(site_timezone) # now we get the datetime series at the tower time step tdts = datetime.timedelta(minutes=site_timestep) # get the start and end datetimes rounded to the nearest time steps # that lie between the first and last times start_date = qcutils.rounddttots(dt_erai_utc_cor[0], ts=site_timestep) if start_date < dt_erai_utc_cor[0]: start_date = start_date + tdts end_date = qcutils.rounddttots(dt_erai_utc_cor[-1], ts=site_timestep) if end_date > dt_erai_utc_cor[-1]: end_date = end_date - tdts print " Got data from ", start_date, " UTC to ", end_date, " UTC" #print site_name,end_date,dt_erai_utc_cor[-1] # UTC datetime series at the tower time step dt_erai_utc_tts = [ x for x in qcutils.perdelta(start_date, end_date, tdts) ] # UTC netCDF time series at tower time step for interpolation tmp = [x.replace(tzinfo=None) for x in dt_erai_utc_tts] erai_time_tts = netCDF4.date2num(tmp, time_units) # local datetime series at tower time step dt_erai_loc_tts = [x.astimezone(site_tz) for x in dt_erai_utc_tts] # NOTE: will have to disable daylight saving at some stage, towers stay on Standard Time # PRI hopes that the following line will do this ... dt_erai_loc_tts = [x - x.dst() for x in dt_erai_loc_tts] # make the datetime series timezone naive and put it in data structure dt_erai_loc_tts = [x.replace(tzinfo=None) for x in dt_erai_loc_tts] ds_erai.series["DateTime"]["Data"] = dt_erai_loc_tts ds_erai.globalattributes["nc_nrecs"] = len(dt_erai_loc_tts) ds_erai.globalattributes["start_datetime"] = str(dt_erai_loc_tts[0]) ds_erai.globalattributes["end_datetime"] = str(dt_erai_loc_tts[-1])
#print bom_id,":",ldtn[0],ldtn[-1] start_date = min([start_date,ldtn[0]]) end_date = max([end_date,ldtn[-1]]) #print start_date,end_date # merge the individual data structures into a single one log.info("Merging file contents") ds_all = qcio.DataStructure() ds_all.globalattributes["time_step"] = 30 ds_all.globalattributes["xl_datemode"] = 0 ds_all.globalattributes["site_name"] = site_name ds_all.globalattributes["latitude"] = site_latitude ds_all.globalattributes["longitude"] = site_longitude ds_all.globalattributes["elevation"] = site_elevation ts = int(ds_all.globalattributes["time_step"]) ldt_all = [result for result in qcutils.perdelta(start_date,end_date,datetime.timedelta(minutes=ts))] nRecs = len(ldt_all) ds_all.globalattributes["nc_nrecs"] = nRecs ds_all.series["DateTime"] = {} ds_all.series["DateTime"]["Data"] = ldt_all flag = numpy.zeros(nRecs,dtype=numpy.int32) ds_all.series["DateTime"]["Flag"] = flag ds_all.series["DateTime"]["Attr"] = {} ds_all.series['DateTime']["Attr"]["long_name"] = "Date-time object" ds_all.series['DateTime']["Attr"]["units"] = "None" # get the year, month, day, hour, minute and seconds from the Python datetime qcutils.get_ymdhmsfromdatetime(ds_all) # get the xlDateTime from the xlDateTime = qcutils.get_xldatefromdatetime(ds_all) attr = qcutils.MakeAttributeDictionary(long_name="Date/time in Excel format",units="days since 1899-12-31 00:00:00") qcutils.CreateSeries(ds_all,"xlDateTime",xlDateTime,Flag=flag,Attr=attr)
# get the mean and standard deviation of the QC'd pixels evi_median = numpy.ma.median(evi.reshape(evi_qc.shape[0],-1),axis=1) evi_mean = numpy.ma.mean(evi.reshape(evi_qc.shape[0],-1),axis=1) evi_sd = numpy.ma.std(evi.reshape(evi_qc.shape[0],-1),axis=1) evi_mean = numpy.ma.masked_where(evi_sd>evi_sd_threshold,evi_mean) evi_sd = numpy.ma.masked_where(evi_sd>evi_sd_threshold,evi_sd) # strip out masked elements, convert to ndarray from masked array and get various # time series to do the interpolation idx = numpy.where(numpy.ma.getmaskarray(evi_mean)==False)[0] evi_raw = numpy.array(evi_mean[idx]) modis_time_raw = numpy.array(modis_time[idx]) modis_dt_raw = netCDF4.num2date(modis_time_raw,modis_time_units) start_date = modis_dt_raw[0] end_date = modis_dt_raw[-1] tdts = datetime.timedelta(days=16) modis_dt_interp = [result for result in qcutils.perdelta(start_date,end_date,tdts)] modis_time_interp = netCDF4.date2num(modis_dt_interp,modis_time_units) # fill the missing MODIS data with linear interpolation if evi_interpolate.lower()=="linear": k = 1 elif evi_interpolate.lower()=="quadratic": k = 2 elif evi_interpolate.lower()=="cubic": k = 3 else: print "Unrecognised interpolation option, using linear ..." k = 1 s = scipy.interpolate.InterpolatedUnivariateSpline(modis_time_raw,evi_raw,k=k) evi_interp = s(modis_time) # apply the Savitsky-Golay smoothing filter if requested if evi_smooth_filter.lower()=="savitsky-golay":