def interpolate_to_30minutes(ds_60minutes): ds_30minutes = qcio.DataStructure() # copy the global attributes for this_attr in list(ds_60minutes.globalattributes.keys()): ds_30minutes.globalattributes[this_attr] = ds_60minutes.globalattributes[this_attr] # update the global attribute "time_step" ds_30minutes.globalattributes["time_step"] = 30 # generate the 30 minute datetime series dt_loc_60minutes = ds_60minutes.series["DateTime"]["Data"] dt_loc_30minutes = [x for x in perdelta(dt_loc_60minutes[0],dt_loc_60minutes[-1],datetime.timedelta(minutes=30))] nRecs_30minutes = len(dt_loc_30minutes) dt_utc_60minutes = ds_60minutes.series["DateTime_UTC"]["Data"] dt_utc_30minutes = [x for x in perdelta(dt_utc_60minutes[0],dt_utc_60minutes[-1],datetime.timedelta(minutes=30))] # update the global attribute "nc_nrecs" ds_30minutes.globalattributes['nc_nrecs'] = nRecs_30minutes ds_30minutes.series["DateTime"] = {} ds_30minutes.series["DateTime"]["Data"] = dt_loc_30minutes flag = numpy.zeros(len(dt_loc_30minutes),dtype=numpy.int32) ds_30minutes.series["DateTime"]["Flag"] = flag ds_30minutes.series["DateTime_UTC"] = {} ds_30minutes.series["DateTime_UTC"]["Data"] = dt_utc_30minutes flag = numpy.zeros(len(dt_utc_30minutes),dtype=numpy.int32) ds_30minutes.series["DateTime_UTC"]["Flag"] = flag # get the year, month etc from the datetime qcutils.get_xldatefromdatetime(ds_30minutes) qcutils.get_ymdhmsfromdatetime(ds_30minutes) # interpolate to 30 minutes nRecs_60 = len(ds_60minutes.series["DateTime"]["Data"]) nRecs_30 = len(ds_30minutes.series["DateTime"]["Data"]) x_60minutes = numpy.arange(0,nRecs_60,1) x_30minutes = numpy.arange(0,nRecs_60-0.5,0.5) varlist_60 = list(ds_60minutes.series.keys()) # strip out the date and time variables already done for item in ["DateTime","DateTime_UTC","xlDateTime","Year","Month","Day","Hour","Minute","Second","Hdh","Hr_UTC"]: if item in varlist_60: varlist_60.remove(item) # now do the interpolation (its OK to interpolate accumulated precipitation) for label in varlist_60: series_60minutes,flag,attr = qcutils.GetSeries(ds_60minutes,label) ci_60minutes = numpy.zeros(len(series_60minutes)) idx = numpy.where(abs(series_60minutes-float(c.missing_value))<c.eps)[0] ci_60minutes[idx] = float(1) int_fn = interp1d(x_60minutes,series_60minutes) series_30minutes = int_fn(x_30minutes) int_fn = interp1d(x_60minutes,ci_60minutes) ci_30minutes = int_fn(x_30minutes) idx = numpy.where(abs(ci_30minutes-float(0))>c.eps)[0] series_30minutes[idx] = numpy.float64(c.missing_value) flag_30minutes = numpy.zeros(nRecs_30, dtype=numpy.int32) flag_30minutes[idx] = numpy.int32(1) qcutils.CreateSeries(ds_30minutes,label,series_30minutes,Flag=flag_30minutes,Attr=attr) # get the UTC hour hr_utc = [float(x.hour)+float(x.minute)/60 for x in dt_utc_30minutes] attr = qcutils.MakeAttributeDictionary(long_name='UTC hour') flag_30minutes = numpy.zeros(nRecs_30, dtype=numpy.int32) qcutils.CreateSeries(ds_30minutes,'Hr_UTC',hr_utc,Flag=flag_30minutes,Attr=attr) return ds_30minutes
def interpolate_ds(ds_in, ts): """ Purpose: Interpolate the contents of a data structure onto a different time step. Assumptions: Usage: Author: PRI Date: June 2017 """ logger.info("Interpolating data") # instance the output data structure ds_out = qcio.DataStructure() # copy the global attributes ds_out.globalattributes = copy.deepcopy(ds_in.globalattributes) # add the time step ds_out.globalattributes["time_step"] = str(ts) # generate a regular time series at the required time step dt = ds_in.series["DateTime"]["Data"] dt0 = qcutils.rounddttots(dt[0], ts=ts) if dt0 < dt[0]: dt0 = dt0 + datetime.timedelta(minutes=ts) dt1 = qcutils.rounddttots(dt[-1], ts=ts) if dt1 > dt[-1]: dt1 = dt1 - datetime.timedelta(minutes=ts) idt = [result for result in qcutils.perdelta(dt0, dt1, datetime.timedelta(minutes=ts))] x1 = numpy.array([toTimestamp(dt[i]) for i in range(len(dt))]) x2 = numpy.array([toTimestamp(idt[i]) for i in range(len(idt))]) # loop over the series in the data structure and interpolate flag = numpy.zeros(len(idt), dtype=numpy.int32) attr = {"long_name":"Datetime", "units":"none"} ldt_var = {"Label":"DateTime", "Data":idt, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds_out, ldt_var) qcutils.get_nctime_from_datetime(ds_out) nrecs = len(idt) ds_out.globalattributes["nc_nrecs"] = nrecs # first, we do the air temperature, dew point temperature and surface pressure f0 = numpy.zeros(nrecs, dtype=numpy.int32) f1 = numpy.ones(nrecs, dtype=numpy.int32) for label in ["Ta", "Td", "ps", "RH", "Ah", "q"]: var_out = qcutils.create_empty_variable(label, nrecs, datetime=idt) var_in = qcutils.GetVariable(ds_in, label) var_out["Data"] = interpolate_1d(x1, var_in["Data"], x2) var_out["Flag"] = numpy.where(numpy.ma.getmaskarray(var_out["Data"])==True, f1, f0) var_out["Attr"] = copy.deepcopy(var_in["Attr"]) qcutils.CreateVariable(ds_out, var_out) # now clamp the dew point so that TD <= TA Ta = qcutils.GetVariable(ds_out, "Ta") Td = qcutils.GetVariable(ds_out, "Td") Td["Data"] = numpy.ma.where(Td["Data"]<=Ta["Data"], x=Td["Data"], y=Ta["Data"]) qcutils.CreateVariable(ds_out, Td) # now we do wind speed and direction by converting to U and V components interpolate_wswd(ds_in, x1, ds_out, x2) # and lastly, do precipitation interpolate_precip(ds_in, x1, ds_out, x2) return ds_out
def l1qc(cf): # get the data series from the Excel file in_filename = qcio.get_infilenamefromcf(cf) if not qcutils.file_exists(in_filename, mode="quiet"): msg = " Input file " + in_filename + " not found ..." logger.error(msg) ds1 = qcio.DataStructure() ds1.returncodes = {"value": 1, "message": msg} return ds1 file_name, file_extension = os.path.splitext(in_filename) if "csv" in file_extension.lower(): ds1 = qcio.csv_read_series(cf) if ds1.returncodes["value"] != 0: return ds1 # get a series of Excel datetime from the Python datetime objects qcutils.get_xldatefromdatetime(ds1) else: ds1 = qcio.xl_read_series(cf) if ds1.returncodes["value"] != 0: return ds1 # get a series of Python datetime objects from the Excel datetime qcutils.get_datetimefromxldate(ds1) # get the netCDF attributes from the control file qcts.do_attributes(cf, ds1) # round the Python datetime to the nearest second qcutils.round_datetime(ds1, mode="nearest_second") #check for gaps in the Python datetime series and fix if present fixtimestepmethod = qcutils.get_keyvaluefromcf(cf, ["options"], "FixTimeStepMethod", default="round") if qcutils.CheckTimeStep(ds1): qcutils.FixTimeStep(ds1, fixtimestepmethod=fixtimestepmethod) # recalculate the Excel datetime qcutils.get_xldatefromdatetime(ds1) # get the Year, Month, Day etc from the Python datetime qcutils.get_ymdhmsfromdatetime(ds1) # write the processing level to a global attribute ds1.globalattributes['nc_level'] = str("L1") # get the start and end date from the datetime series unless they were # given in the control file if 'start_date' not in ds1.globalattributes.keys(): ds1.globalattributes['start_date'] = str( ds1.series['DateTime']['Data'][0]) if 'end_date' not in ds1.globalattributes.keys(): ds1.globalattributes['end_date'] = str( ds1.series['DateTime']['Data'][-1]) # calculate variances from standard deviations and vice versa qcts.CalculateStandardDeviations(cf, ds1) # create new variables using user defined functions qcts.DoFunctions(cf, ds1) # create a series of synthetic downwelling shortwave radiation qcts.get_synthetic_fsd(ds1) # check missing data and QC flags are consistent qcutils.CheckQCFlags(ds1) return ds1
def l1qc_read_files(cf): # get the data series from the Excel file in_filename = qcio.get_infilenamefromcf(cf) if not qcutils.file_exists(in_filename, mode="quiet"): msg = " Input file " + in_filename + " not found ..." log.error(msg) ds1 = qcio.DataStructure() ds1.returncodes = {"value": 1, "message": msg} return ds1 file_name, file_extension = os.path.splitext(in_filename) if "csv" in file_extension.lower(): ds1 = qcio.csv_read_series(cf) if ds1.returncodes["value"] != 0: return ds1 # get a series of Excel datetime from the Python datetime objects qcutils.get_xldatefromdatetime(ds1) else: ds1 = qcio.xl_read_series(cf) if ds1.returncodes["value"] != 0: return ds1 # get a series of Python datetime objects from the Excel datetime qcutils.get_datetimefromxldate(ds1) return ds1
ax1.plot(modis_dt, evi_qc[:, i, j], 'r+') ax1.errorbar(modis_dt, evi_mean, yerr=evi_sd, fmt='ro') ax1.plot(modis_dt, evi_interp, 'g^') ax1.plot(modis_dt, evi_interp, 'g--') ax1.plot(modis_dt, evi_interp_smooth, 'y-') #for item in fire_dates: plt.axvline(item) ax2 = plt.subplot(212, sharex=ax1) ax2.errorbar(modis_dt, evi_mean, yerr=evi_sd, fmt='ro') ax2.plot(dt_UTC, evi_interp2_smooth, 'b-') png_filename = out_name.replace(".nc", ".png") fig.savefig(png_filename, format="png") plt.draw() plt.ioff() # create a data structure and write the global attributes ds = qcio.DataStructure() ds.series["DateTime"] = {} ds.globalattributes["site_name"] = site_name ds.globalattributes["time_zone"] = site_timezone ds.globalattributes["longitude"] = site_longitude ds.globalattributes["latitude"] = site_latitude ds.globalattributes["time_step"] = site_timestep ds.globalattributes["xl_datemode"] = str(0) ds.globalattributes["nc_level"] = "L1" # convert from UTC to local time site_tz = pytz.timezone(site_timezone) # put the time zone (UTC) into the datetime dt_utc = [x.replace(tzinfo=pytz.utc) for x in dt_UTC] # convert from UTC to local time dt_loc = [x.astimezone(site_tz) for x in dt_utc] # remove any daylight saving adjustments (towers run on standard time)
def read_isd_file(isd_file_path): """ Purpose: Reads an ISD CSV file (gz or uncompressed) and returns the data in a data structure. Assumptions: Usage: Author: PRI Date: June 2017 """ isd_file_name = os.path.split(isd_file_path)[1] msg = "Reading ISD file "+isd_file_name logger.info(msg) isd_site_id = isd_file_name.split("-") isd_site_id = isd_site_id[0]+"-"+isd_site_id[1] # read the file if os.path.splitext(isd_file_path)[1] == ".gz": with gzip.open(isd_file_path, 'rb') as fp: content = fp.readlines() else: with open(isd_file_path) as fp: content = fp.readlines() # get a data structure ds = qcio.DataStructure() # get the site latitude, longitude and altitude ds.globalattributes["altitude"] = float(content[0][46:51]) ds.globalattributes["latitude"] = float(content[0][28:34])/float(1000) ds.globalattributes["longitude"] = float(content[0][34:41])/float(1000) ds.globalattributes["isd_site_id"] = isd_site_id # initialise the data structure isd = {} isd["DateTime"] = {"Data":[],"Flag":[],"Attr":{"long_name":"Datetime","units":"none"}} isd["Wd"] = {"Data":[],"Attr":{"long_name":"Wind direction","units":"degrees","missing_value":999}} isd["Ws"] = {"Data":[],"Attr":{"long_name":"Wind speed","units":"m/s","missing_value":999.9}} isd["Ta"] = {"Data":[],"Attr":{"long_name":"Air temperature","units":"C","missing_value":999.9}} isd["Td"] = {"Data":[],"Attr":{"long_name":"Dew point temperature","units":"C","missing_value":999.9}} isd["ps"] = {"Data":[],"Attr":{"long_name":"Surface pressure","units":"kPa","missing_value":9999.9}} isd["Precip"] = {"Data":[],"Attr":{"long_name":"Precipitation","units":"mm","missing_value":999.9}} # define the codes for good data in the ISD file OK_obs_code = ["AUTO ","CRN05","CRN15","FM-12","FM-15","FM-16","SY-MT"] # iterate over the lines in the file and decode the data for i in range(len(content)-1): #for i in range(10): # filter out anything other than hourly data if content[i][41:46] not in OK_obs_code: continue YY = int(content[i][15:19]) MM = int(content[i][19:21]) DD = int(content[i][21:23]) HH = int(content[i][23:25]) mm = int(content[i][25:27]) dt = datetime.datetime(YY,MM,DD,HH,mm,0) #isd["DateTime"]["Data"].append(pytz.utc.localize(dt)) isd["DateTime"]["Data"].append(dt) # wind direction, degT try: isd["Wd"]["Data"].append(float(content[i][60:63])) except: isd["Wd"]["Data"].append(float(999)) # wind speed, m/s try: isd["Ws"]["Data"].append(float(content[i][65:69])/float(10)) except: isd["Ws"]["Data"].append(float(999.9)) # air temperature, C try: isd["Ta"]["Data"].append(float(content[i][87:92])/float(10)) except: isd["Ta"]["Data"].append(float(999.9)) # dew point temperature, C try: isd["Td"]["Data"].append(float(content[i][93:98])/float(10)) except: isd["Td"]["Data"].append(float(999.9)) # sea level pressure, hPa try: isd["ps"]["Data"].append(float(content[i][99:104])/float(10)) except: isd["ps"]["Data"].append(float(9999.9)) # precipitation, mm if content[i][108:111] == "AA1": try: isd["Precip"]["Data"].append(float(content[i][113:117])/float(10)) except: isd["Precip"]["Data"].append(float(999.9)) else: isd["Precip"]["Data"].append(float(999.9)) # add the time zone to the DateTime ataributes isd["DateTime"]["Attr"]["time_zone"] = "UTC" # get the number of records and add this to the global attributes nrecs = len(isd["DateTime"]["Data"]) ds.globalattributes["nc_nrecs"] = str(nrecs) # define the QC flags f0 = numpy.zeros(len(isd["DateTime"]["Data"])) f1 = numpy.ones(len(isd["DateTime"]["Data"])) # deal with the datetime first variable = {"Label":"DateTime", "Data":numpy.array(isd["DateTime"]["Data"]), "Flag":f0, "Attr":isd["DateTime"]["Attr"]} qcutils.CreateVariable(ds, variable) # get the nominal time step dt_delta = qcutils.get_timestep(ds) ts = scipy.stats.mode(dt_delta)[0]/60 ds.globalattributes["time_step"] = ts[0] # add the variables to the data structure logger.info("Writing data to the data structure") labels = [label for label in isd.keys() if label != "DateTime"] for label in labels: data = numpy.ma.masked_equal(isd[label]["Data"], isd[label]["Attr"]["missing_value"]) flag = numpy.where(numpy.ma.getmaskarray(data) == True, f1, f0) attr = isd[label]["Attr"] variable = {"Label":label, "Data":data, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) # hPa to kPa ps = qcutils.GetVariable(ds, "ps") ps["Data"] = ps["Data"]/float(10) # convert sea level pressure to station pressure site_altitude = float(ds.globalattributes["altitude"]) Ta = qcutils.GetVariable(ds, "Ta") cfac = numpy.ma.exp((-1*site_altitude)/((Ta["Data"]+273.15)*29.263)) ps["Data"] = ps["Data"]*cfac ps["Attr"]["long_name"] = ps["Attr"]["long_name"]+", adjusted from sea level to station" qcutils.CreateVariable(ds, ps) # do precipitation and apply crude limits Precip = qcutils.GetVariable(ds, "Precip") condition = (Precip["Data"]<0)|(Precip["Data"]>100) Precip["Data"] = numpy.ma.masked_where(condition, Precip["Data"]) Precip["Flag"] = numpy.where(numpy.ma.getmaskarray(Precip["Data"])==True, f1, f0) Precip["Attr"]["RangeCheck_upper"] = 100 Precip["Attr"]["RangeCheck_lower"] = 0 qcutils.CreateVariable(ds, Precip) # get the humidities from Td Ta = qcutils.GetVariable(ds, "Ta") Td = qcutils.GetVariable(ds, "Td") ps = qcutils.GetVariable(ds, "ps") RH = mf.RHfromdewpoint(Td["Data"], Ta["Data"]) flag = numpy.where(numpy.ma.getmaskarray(RH)==True, f1, f0) attr = {"long_name":"Relative humidity", "units":"%"} variable = {"Label":"RH", "Data":RH, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) Ah = mf.absolutehumidityfromRH(Ta["Data"], RH) flag = numpy.where(numpy.ma.getmaskarray(Ah)==True, f1, f0) attr = {"long_name":"Absolute humidity", "units":"g/m3"} variable = {"Label":"Ah", "Data":Ah, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) q = mf.specifichumidityfromRH(RH, Ta["Data"], ps["Data"]) flag = numpy.where(numpy.ma.getmaskarray(q)==True, f1, f0) attr = {"long_name":"Specific humidity", "units":"kg/kg"} variable = {"Label":"q", "Data":q, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) # get U and V components from wind speed and direction Ws = qcutils.GetVariable(ds, "Ws") Wd = qcutils.GetVariable(ds, "Wd") U, V = qcutils.convert_WSWDtoUV(Ws, Wd) qcutils.CreateVariable(ds, U) qcutils.CreateVariable(ds, V) # add the time variable qcutils.get_nctime_from_datetime(ds) # return the data return ds
def average_duplicate_times(ds_in, time_step): """ Purpose: Remove duplicate time steps by averaging data with the same time stamp. The routine uses scipy.stats.binned_statistics() to bin the data based on the time (bins have width time_step and are centered on times that are an integral of time_step). Usage: ds_out = average_duplicate_times(ds_in, time_step=30) Side effects: The time given for the averages and sums is the end of the time period. Author: PRI Date: October 2017 """ logger.info("Getting data onto a regular time step") # get the time as a number (see attr["units"] for units) time_var = qcutils.GetVariable(ds_in, "time") # generate an array of bin edges for use by binned_statistics() bin_width = time_step*60 # round the ISD start time to an integral of the time step t0 = time_step*60*int(time_var["Data"].data[0]/(time_step*60)) bin_first = t0 - bin_width # round the ISD end time to an integral of the time step t1 = time_step*60*int(time_var["Data"].data[-1]/(time_step*60)) # make sure we go 1 beyond the end time if t1 < time_var["Data"][-1]: t1 = t1 + bin_width # generate an array of bin edges bin_last = t1 + bin_width bins = numpy.arange(bin_first, bin_last, bin_width) # get the number of records in the output series nrecs = len(bins)-1 # generate series of zeros and ones to be used as QC flags f0 = numpy.zeros(nrecs) f1 = numpy.ones(nrecs) # create an output data structure with a copy of the input global attributes ds_out = qcio.DataStructure() ds_out.globalattributes = copy.deepcopy(ds_in.globalattributes) # update the number of records ds_out.globalattributes["nc_nrecs"] = nrecs # get a list of variable labels but exclude the datetime, time, wind speed and direction variables # NB: Wind velocity components U and V will be averaged and wind speed and direction calculated # from these. labels = [label for label in ds_in.series.keys() if label not in ["DateTime", "time"]] # loop over variables for label in labels: # get the variable var_in = qcutils.GetVariable(ds_in, label) # indices of non-masked elements idx = numpy.ma.where(numpy.ma.getmaskarray(var_in["Data"]) == False)[0] # check to see if we have at least 1 data point to deal with if len(idx) != 0: # get the non-masked data as an ndarray data_in = numpy.array(var_in["Data"][idx].data) time_in = numpy.array(time_var["Data"][idx].data) # use binned_statistic() to average records with the same datetime if var_in["Label"][0:1] == "P" and var_in["Attr"]["units"] in ["m", "mm"]: # do sum for precipitation sums, edges, indices = scipy.stats.binned_statistic(time_in, data_in, statistic="sum", bins=bins) # convert output to a masked array and mask empty bins data_out = numpy.ma.masked_where(numpy.isfinite(sums) == False, numpy.ma.array(sums)) else: # do average for everything else means, edges, indices = scipy.stats.binned_statistic(time_in, data_in, statistic="mean", bins=bins) # convert output to a masked array and mask empty bins data_out = numpy.ma.masked_where(numpy.isfinite(means) == False, numpy.ma.array(means)) # generate the QC flag flag_out = numpy.where(numpy.ma.getmaskarray(data_out) == True, f1, f0) # and create the output variable var_out = {"Label":label, "Data":data_out, "Flag":flag_out, "Attr":var_in["Attr"]} else: # no data, so create an empty output variable var_out = {"Label":label, "Data":numpy.ma.masked_all(nrecs), "Flag":f1, "Attr":var_in["Attr"]} # and write the output variable to the output data structure qcutils.CreateVariable(ds_out, var_out) # generate a series of the bin mid-points mids = edges[1:] # and convert these to a series of Python datetimes attr = copy.deepcopy(ds_in.series["DateTime"]["Attr"]) ldt_out = {"Label":"DateTime", "Data":netCDF4.num2date(mids, time_var["Attr"]["units"]), "Flag":f0, "Attr":attr} # and write the datetime to the output data structure qcutils.CreateVariable(ds_out, ldt_out) qcutils.get_nctime_from_datetime(ds_out) # get wind speed and direction from components U = qcutils.GetVariable(ds_out, "u") V = qcutils.GetVariable(ds_out, "v") WS, WD = qcutils.convert_UVtoWSWD(U, V) qcutils.CreateVariable(ds_out, WS) qcutils.CreateVariable(ds_out, WD) return ds_out
# put the data for this site into the all sites data structure ds_out[site_index[isd_site]] = copy.deepcopy(ds_mlg) # add some useful global attributes ds_out[site_index[isd_site]].globalattributes["isd_site_id"] = isd_site ds_out[site_index[isd_site]].globalattributes["time_zone"] = time_zone # write out a netCDF file for each ISD site and each year #nc_file_name = isd_site+"_"+str(year)+".nc" #nc_dir_path = os.path.join(out_base_path,site,"Data","ISD") #if not os.path.exists(nc_dir_path): #os.makedirs(nc_dir_path) #nc_file_path = os.path.join(nc_dir_path,nc_file_name) #nc_file = qcio.nc_open_write(nc_file_path) #qcio.nc_write_series(nc_file, ds_out[site_index[isd_site]], ndims=1) # now we merge the data structures for each ISD station into a single data structure # first, instance a data structure ds_all = qcio.DataStructure() ds_all.globalattributes["latitude"] = site_info[site]["Latitude"] ds_all.globalattributes["longitude"] = site_info[site]["Longitude"] ds_all.globalattributes["altitude"] = site_info[site]["Altitude"] # now loop over the data structures for each ISD station and get the earliest # start time and the latest end time start_datetime = [] end_datetime = [] for i in list(ds_out.keys()): start_datetime.append(ds_out[i].series["DateTime"]["Data"][0]) end_datetime.append(ds_out[i].series["DateTime"]["Data"][-1]) print site, year start = min(start_datetime) end = max(end_datetime) # now make a datetime series at the required time step from the earliest start # datetime to the latest end datetime
"site_sa_limit", default=5) # index of the site in latitude dimension site_lat_index = int(((latitude[0] - site_latitude) / lat_resolution) + 0.5) erai_latitude = latitude[site_lat_index] # index of the site in longitude dimension if site_longitude < 0: site_longitude = float(360) + site_longitude site_lon_index = int(( (site_longitude - longitude[0]) / lon_resolution) + 0.5) erai_longitude = longitude[site_lon_index] print " Site coordinates: ", site_latitude, site_longitude print " ERAI grid: ", latitude[site_lat_index], longitude[ site_lon_index] # get an instance of the Datastructure ds_erai = qcio.DataStructure() ds_erai.series["DateTime"] = {} ds_erai.globalattributes["site_name"] = site_name ds_erai.globalattributes["time_zone"] = site_timezone ds_erai.globalattributes["latitude"] = site_latitude ds_erai.globalattributes["longitude"] = site_longitude ds_erai.globalattributes["time_step"] = site_timestep ds_erai.globalattributes["sa_limit"] = site_sa_limit ds_erai.globalattributes['xl_datemode'] = str(0) ds_erai.globalattributes["nc_level"] = "L1" # get the UTC and local datetime series site_tz = pytz.timezone(site_timezone) # now we get the datetime series at the tower time step tdts = datetime.timedelta(minutes=site_timestep) # get the start and end datetimes rounded to the nearest time steps # that lie between the first and last times
cf_name = qcio.get_controlfilename(path='../controlfiles',title='Choose a control file') # get the control file contents logging.info('Reading the control file') cf = configobj.ConfigObj(cf_name) # get stuff from the control file logging.info('Getting control file contents') site_list = list(cf["Sites"].keys()) var_list = list(cf["Variables"].keys()) # loop over sites #site_list = ["AdelaideRiver"] for site in site_list: info = get_info_dict(cf,site) logging.info("Processing site "+info["site_name"]) # instance the data structures logging.info('Creating the data structures') ds_60minutes = qcio.DataStructure() # get a sorted list of files that match the mask in the control file file_list = sorted(glob.glob(info["in_filename"])) # read the netcdf files logging.info('Reading the netCDF files for '+info["site_name"]) f = access_read_mfiles2(file_list,var_list=var_list) # get the data from the netCDF files and write it to the 60 minute data structure logging.info('Getting the ACCESS data') get_accessdata(cf,ds_60minutes,f,info) # set some global attributes logging.info('Setting global attributes') set_globalattributes(ds_60minutes,info) # check for time gaps in the file logging.info("Checking for time gaps") if qcutils.CheckTimeStep(ds_60minutes): qcutils.FixTimeStep(ds_60minutes)
def read_isd_file(isd_file_path): """ Purpose: Reads an ISD CSV file (gz or uncompressed) and returns the data in a data structure. Assumptions: Usage: Author: PRI Date: June 2017 """ isd_file_name = os.path.split(isd_file_path)[1] msg = "Reading ISD file " + isd_file_name logger.info(msg) isd_site_id = isd_file_name.split("-") isd_site_id = isd_site_id[0] + "-" + isd_site_id[1] # read the file if os.path.splitext(isd_file_path)[1] == ".gz": with gzip.open(isd_file_path, 'rb') as fp: content = fp.readlines() else: with open(isd_file_path) as fp: content = fp.readlines() # get a data structure ds = qcio.DataStructure() # get the site latitude, longitude and altitude ds.globalattributes["altitude"] = float(content[0][46:51]) ds.globalattributes["latitude"] = float(content[0][28:34]) / float(1000) ds.globalattributes["longitude"] = float(content[0][34:41]) / float(1000) ds.globalattributes["isd_site_id"] = isd_site_id # initialise the data structure ds.series["DateTime"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Datetime", "units": "none" } } ds.series["Wd"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Wind direction", "units": "degrees" } } ds.series["Ws"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Wind speed", "units": "m/s" } } ds.series["Ta"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Air temperature", "units": "C" } } ds.series["Td"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Dew point temperature", "units": "C" } } ds.series["ps"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Surface pressure", "units": "kPa" } } ds.series["Precip"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Precipitation", "units": "mm" } } # define the codes for good data in the ISD file OK_obs_code = [ "AUTO ", "CRN05", "CRN15", "FM-12", "FM-15", "FM-16", "SY-MT" ] # iterate over the lines in the file and decode the data for i in range(len(content) - 1): #for i in range(10): # filter out anything other than hourly data if content[i][41:46] not in OK_obs_code: continue YY = int(content[i][15:19]) MM = int(content[i][19:21]) DD = int(content[i][21:23]) HH = int(content[i][23:25]) mm = int(content[i][25:27]) dt = datetime.datetime(YY, MM, DD, HH, mm, 0) ds.series["DateTime"]["Data"].append(pytz.utc.localize(dt)) # wind direction, degT try: ds.series["Wd"]["Data"].append(float(content[i][60:63])) except: ds.series["Wd"]["Data"].append(float(999)) # wind speed, m/s try: ds.series["Ws"]["Data"].append( float(content[i][65:69]) / float(10)) except: ds.series["Ws"]["Data"].append(float(999.9)) # air temperature, C try: ds.series["Ta"]["Data"].append( float(content[i][87:92]) / float(10)) except: ds.series["Ta"]["Data"].append(float(999.9)) # dew point temperature, C try: ds.series["Td"]["Data"].append( float(content[i][93:98]) / float(10)) except: ds.series["Td"]["Data"].append(float(999.9)) # sea level pressure, hPa try: ds.series["ps"]["Data"].append( float(content[i][99:104]) / float(10)) except: ds.series["ps"]["Data"].append(float(9999.9)) # precipitation, mm if content[i][108:111] == "AA1": try: ds.series["Precip"]["Data"].append( float(content[i][113:117]) / float(10)) except: ds.series["Precip"]["Data"].append(float(999.9)) else: ds.series["Precip"]["Data"].append(float(999.9)) # add the time zone to the DateTime ataributes ds.series["DateTime"]["Attr"]["time_zone"] = "UTC" # convert from lists to masked arrays f0 = numpy.zeros(len(ds.series["DateTime"]["Data"])) f1 = numpy.ones(len(ds.series["DateTime"]["Data"])) ds.series["DateTime"]["Data"] = numpy.array(ds.series["DateTime"]["Data"]) ds.series["DateTime"]["Flag"] = f0 ds.globalattributes["nc_nrecs"] = len(f0) dt_delta = qcutils.get_timestep(ds) ts = scipy.stats.mode(dt_delta)[0] / 60 ds.globalattributes["time_step"] = ts[0] ds.series["Wd"]["Data"] = numpy.ma.masked_equal(ds.series["Wd"]["Data"], 999) ds.series["Wd"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Wd"]["Data"]) == True, f1, f0) ds.series["Ws"]["Data"] = numpy.ma.masked_equal(ds.series["Ws"]["Data"], 999.9) ds.series["Ws"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Ws"]["Data"]) == True, f1, f0) ds.series["Ta"]["Data"] = numpy.ma.masked_equal(ds.series["Ta"]["Data"], 999.9) ds.series["Ta"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Ta"]["Data"]) == True, f1, f0) ds.series["Td"]["Data"] = numpy.ma.masked_equal(ds.series["Td"]["Data"], 999.9) ds.series["Td"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Td"]["Data"]) == True, f1, f0) # hPa to kPa ds.series["ps"]["Data"] = numpy.ma.masked_equal(ds.series["ps"]["Data"], 9999.9) / float(10) ds.series["ps"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["ps"]["Data"]) == True, f1, f0) # convert sea level pressure to station pressure site_altitude = float(ds.globalattributes["altitude"]) cfac = numpy.ma.exp( (-1 * site_altitude) / ((ds.series["Ta"]["Data"] + 273.15) * 29.263)) ds.series["ps"]["Data"] = ds.series["ps"]["Data"] * cfac # do precipitation and apply crude limits ds.series["Precip"]["Data"] = numpy.ma.masked_equal( ds.series["Precip"]["Data"], 999.9) condition = (ds.series["Precip"]["Data"] < 0) | (ds.series["Precip"]["Data"] > 100) ds.series["Precip"]["Data"] = numpy.ma.masked_where( condition, ds.series["Precip"]["Data"]) ds.series["Precip"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Precip"]["Data"]) == True, f1, f0) # get the humidities from Td Ta, flag, attr = qcutils.GetSeriesasMA(ds, "Ta") Td, flag, attr = qcutils.GetSeriesasMA(ds, "Td") ps, flag, attr = qcutils.GetSeriesasMA(ds, "ps") RH = mf.RHfromdewpoint(Td, Ta) flag = numpy.where(numpy.ma.getmaskarray(RH) == True, f1, f0) attr = {"long_name": "Relative humidity", "units": "%"} qcutils.CreateSeries(ds, "RH", RH, Flag=flag, Attr=attr) Ah = mf.absolutehumidityfromRH(Ta, RH) flag = numpy.where(numpy.ma.getmaskarray(Ah) == True, f1, f0) attr = {"long_name": "Absolute humidity", "units": "g/m3"} qcutils.CreateSeries(ds, "Ah", Ah, Flag=flag, Attr=attr) q = mf.specifichumidityfromRH(RH, Ta, ps) flag = numpy.where(numpy.ma.getmaskarray(q) == True, f1, f0) attr = {"long_name": "Specific humidity", "units": "kg/kg"} qcutils.CreateSeries(ds, "q", q, Flag=flag, Attr=attr) # return the data return ds
def interpolate_ds(ds_in, ts, k=3): """ Purpose: Interpolate the contents of a data structure onto a different time step. Assumptions: Usage: Author: PRI Date: June 2017 """ # instance the output data structure ds_out = qcio.DataStructure() # copy the global attributes for key in ds_in.globalattributes.keys(): ds_out.globalattributes[key] = ds_in.globalattributes[key] # add the time step ds_out.globalattributes["time_step"] = str(ts) # generate a regular time series at the required time step dt = ds_in.series["DateTime"]["Data"] dt0 = dt[0] - datetime.timedelta(minutes=30) start = datetime.datetime(dt0.year, dt0.month, dt0.day, dt0.hour, 0, 0) dt1 = dt[-1] + datetime.timedelta(minutes=30) end = datetime.datetime(dt1.year, dt1.month, dt1.day, dt1.hour, 0, 0) idt = [ result for result in perdelta(start, end, datetime.timedelta(minutes=ts)) ] x1 = numpy.array([toTimestamp(dt[i]) for i in range(len(dt))]) x2 = numpy.array([toTimestamp(idt[i]) for i in range(len(idt))]) # loop over the series in the data structure and interpolate ds_out.series["DateTime"] = {} ds_out.series["DateTime"]["Data"] = idt ds_out.series["DateTime"]["Flag"] = numpy.zeros(len(idt)) ds_out.series["DateTime"]["Attr"] = { "long_name": "Datetime", "units": "none" } ds_out.globalattributes["nc_nrecs"] = len(idt) series_list = list(ds_in.series.keys()) if "DateTime" in series_list: series_list.remove("DateTime") for label in series_list: #print label data_in, flag_in, attr_in = qcutils.GetSeriesasMA(ds_in, label) # check if we are dealing with precipitation if "Precip" in label: # precipitation shouldn't be interpolated, just assign any precipitation # to the ISD time stamp. data_out = numpy.ma.zeros(len(idt), dtype=numpy.float64) idx = numpy.searchsorted(x2, numpy.intersect1d(x2, x1)) data_out[idx] = data_in else: # interpolate everything else data_out = interpolate_1d(x1, data_in, x2) flag_out = numpy.zeros(len(idt)) attr_out = attr_in qcutils.CreateSeries(ds_out, label, data_out, Flag=flag_out, Attr=attr_out) return ds_out
dt_aws_30minute_array = numpy.array(dt_aws_30minute[si_wholehour:ei_wholehour + 1]) nRecs_30minute = len(dt_aws_30minute_array) dt_aws_2d = numpy.reshape(dt_aws_30minute_array, (nRecs_30minute / 2, 2)) dt_aws_60minute = list(dt_aws_2d[:, 1]) nRecs_60minute = len(dt_aws_60minute) series_list = list(ds_aws_30minute.series.keys()) for item in [ "DateTime", "Ddd", "Day", "Minute", "xlDateTime", "Hour", "time", "Month", "Second", "Year" ]: if item in series_list: series_list.remove(item) # get the 60 minute data structure ds_aws_60minute = qcio.DataStructure() # get the global attributes for item in list(ds_aws_30minute.globalattributes.keys()): ds_aws_60minute.globalattributes[ item] = ds_aws_30minute.globalattributes[item] # overwrite with 60 minute values as appropriate ds_aws_60minute.globalattributes["nc_nrecs"] = str(nRecs_60minute) ds_aws_60minute.globalattributes["time_step"] = str(60) # put the Python datetime into the data structure ds_aws_60minute.series["DateTime"] = {} ds_aws_60minute.series["DateTime"]["Data"] = dt_aws_60minute ds_aws_60minute.series["DateTime"]["Flag"] = numpy.zeros(nRecs_60minute, dtype=numpy.int32) ds_aws_60minute.series["DateTime"][ "Attr"] = qcutils.MakeAttributeDictionary( long_name="DateTime in local time zone", units="None")
# get the control file cf = qcio.load_controlfile(path='../controlfiles') if len(cf)==0: sys.exit() start_date = cf["General"]["start_date"] end_date = cf["General"]["end_date"] var_list = cf["Variables"].keys() site_list = cf["Sites"].keys() for site in site_list: # get the input file mask infilename = cf["Sites"][site]["in_filepath"]+cf["Sites"][site]["in_filename"] if not os.path.isfile(infilename): log.error("netCDF file "+infilename+" not found, skipping ...") continue log.info("Starting site: "+site) # get a data structure ds_30 = qcio.DataStructure() # get the output file name outfilename = cf["Sites"][site]["out_filepath"]+cf["Sites"][site]["out_filename"] # average to 30 minutes or not average = True if not cf["Sites"][site].as_bool("average"): average = False # get the site time zone site_timezone = cf["Sites"][site]["site_timezone"] # read the BIOS file bios_ncfile = netCDF4.Dataset(infilename) time = bios_ncfile.variables["time"][:] nRecs = len(time) # set some global attributes ts = ds_30.globalattributes["time_step"] = 30 ds_30.globalattributes["time_zone"] = site_timezone ds_30.globalattributes["nc_nrecs"] = nRecs