def read_isd_file(isd_file_path): """ Purpose: Reads an ISD CSV file (gz or uncompressed) and returns the data in a data structure. Assumptions: Usage: Author: PRI Date: June 2017 """ isd_file_name = os.path.split(isd_file_path)[1] msg = "Reading ISD file "+isd_file_name logger.info(msg) isd_site_id = isd_file_name.split("-") isd_site_id = isd_site_id[0]+"-"+isd_site_id[1] # read the file if os.path.splitext(isd_file_path)[1] == ".gz": with gzip.open(isd_file_path, 'rb') as fp: content = fp.readlines() else: with open(isd_file_path) as fp: content = fp.readlines() # get a data structure ds = qcio.DataStructure() # get the site latitude, longitude and altitude ds.globalattributes["altitude"] = float(content[0][46:51]) ds.globalattributes["latitude"] = float(content[0][28:34])/float(1000) ds.globalattributes["longitude"] = float(content[0][34:41])/float(1000) ds.globalattributes["isd_site_id"] = isd_site_id # initialise the data structure isd = {} isd["DateTime"] = {"Data":[],"Flag":[],"Attr":{"long_name":"Datetime","units":"none"}} isd["Wd"] = {"Data":[],"Attr":{"long_name":"Wind direction","units":"degrees","missing_value":999}} isd["Ws"] = {"Data":[],"Attr":{"long_name":"Wind speed","units":"m/s","missing_value":999.9}} isd["Ta"] = {"Data":[],"Attr":{"long_name":"Air temperature","units":"C","missing_value":999.9}} isd["Td"] = {"Data":[],"Attr":{"long_name":"Dew point temperature","units":"C","missing_value":999.9}} isd["ps"] = {"Data":[],"Attr":{"long_name":"Surface pressure","units":"kPa","missing_value":9999.9}} isd["Precip"] = {"Data":[],"Attr":{"long_name":"Precipitation","units":"mm","missing_value":999.9}} # define the codes for good data in the ISD file OK_obs_code = ["AUTO ","CRN05","CRN15","FM-12","FM-15","FM-16","SY-MT"] # iterate over the lines in the file and decode the data for i in range(len(content)-1): #for i in range(10): # filter out anything other than hourly data if content[i][41:46] not in OK_obs_code: continue YY = int(content[i][15:19]) MM = int(content[i][19:21]) DD = int(content[i][21:23]) HH = int(content[i][23:25]) mm = int(content[i][25:27]) dt = datetime.datetime(YY,MM,DD,HH,mm,0) #isd["DateTime"]["Data"].append(pytz.utc.localize(dt)) isd["DateTime"]["Data"].append(dt) # wind direction, degT try: isd["Wd"]["Data"].append(float(content[i][60:63])) except: isd["Wd"]["Data"].append(float(999)) # wind speed, m/s try: isd["Ws"]["Data"].append(float(content[i][65:69])/float(10)) except: isd["Ws"]["Data"].append(float(999.9)) # air temperature, C try: isd["Ta"]["Data"].append(float(content[i][87:92])/float(10)) except: isd["Ta"]["Data"].append(float(999.9)) # dew point temperature, C try: isd["Td"]["Data"].append(float(content[i][93:98])/float(10)) except: isd["Td"]["Data"].append(float(999.9)) # sea level pressure, hPa try: isd["ps"]["Data"].append(float(content[i][99:104])/float(10)) except: isd["ps"]["Data"].append(float(9999.9)) # precipitation, mm if content[i][108:111] == "AA1": try: isd["Precip"]["Data"].append(float(content[i][113:117])/float(10)) except: isd["Precip"]["Data"].append(float(999.9)) else: isd["Precip"]["Data"].append(float(999.9)) # add the time zone to the DateTime ataributes isd["DateTime"]["Attr"]["time_zone"] = "UTC" # get the number of records and add this to the global attributes nrecs = len(isd["DateTime"]["Data"]) ds.globalattributes["nc_nrecs"] = str(nrecs) # define the QC flags f0 = numpy.zeros(len(isd["DateTime"]["Data"])) f1 = numpy.ones(len(isd["DateTime"]["Data"])) # deal with the datetime first variable = {"Label":"DateTime", "Data":numpy.array(isd["DateTime"]["Data"]), "Flag":f0, "Attr":isd["DateTime"]["Attr"]} qcutils.CreateVariable(ds, variable) # get the nominal time step dt_delta = qcutils.get_timestep(ds) ts = scipy.stats.mode(dt_delta)[0]/60 ds.globalattributes["time_step"] = ts[0] # add the variables to the data structure logger.info("Writing data to the data structure") labels = [label for label in isd.keys() if label != "DateTime"] for label in labels: data = numpy.ma.masked_equal(isd[label]["Data"], isd[label]["Attr"]["missing_value"]) flag = numpy.where(numpy.ma.getmaskarray(data) == True, f1, f0) attr = isd[label]["Attr"] variable = {"Label":label, "Data":data, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) # hPa to kPa ps = qcutils.GetVariable(ds, "ps") ps["Data"] = ps["Data"]/float(10) # convert sea level pressure to station pressure site_altitude = float(ds.globalattributes["altitude"]) Ta = qcutils.GetVariable(ds, "Ta") cfac = numpy.ma.exp((-1*site_altitude)/((Ta["Data"]+273.15)*29.263)) ps["Data"] = ps["Data"]*cfac ps["Attr"]["long_name"] = ps["Attr"]["long_name"]+", adjusted from sea level to station" qcutils.CreateVariable(ds, ps) # do precipitation and apply crude limits Precip = qcutils.GetVariable(ds, "Precip") condition = (Precip["Data"]<0)|(Precip["Data"]>100) Precip["Data"] = numpy.ma.masked_where(condition, Precip["Data"]) Precip["Flag"] = numpy.where(numpy.ma.getmaskarray(Precip["Data"])==True, f1, f0) Precip["Attr"]["RangeCheck_upper"] = 100 Precip["Attr"]["RangeCheck_lower"] = 0 qcutils.CreateVariable(ds, Precip) # get the humidities from Td Ta = qcutils.GetVariable(ds, "Ta") Td = qcutils.GetVariable(ds, "Td") ps = qcutils.GetVariable(ds, "ps") RH = mf.RHfromdewpoint(Td["Data"], Ta["Data"]) flag = numpy.where(numpy.ma.getmaskarray(RH)==True, f1, f0) attr = {"long_name":"Relative humidity", "units":"%"} variable = {"Label":"RH", "Data":RH, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) Ah = mf.absolutehumidityfromRH(Ta["Data"], RH) flag = numpy.where(numpy.ma.getmaskarray(Ah)==True, f1, f0) attr = {"long_name":"Absolute humidity", "units":"g/m3"} variable = {"Label":"Ah", "Data":Ah, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) q = mf.specifichumidityfromRH(RH, Ta["Data"], ps["Data"]) flag = numpy.where(numpy.ma.getmaskarray(q)==True, f1, f0) attr = {"long_name":"Specific humidity", "units":"kg/kg"} variable = {"Label":"q", "Data":q, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) # get U and V components from wind speed and direction Ws = qcutils.GetVariable(ds, "Ws") Wd = qcutils.GetVariable(ds, "Wd") U, V = qcutils.convert_WSWDtoUV(Ws, Wd) qcutils.CreateVariable(ds, U) qcutils.CreateVariable(ds, V) # add the time variable qcutils.get_nctime_from_datetime(ds) # return the data return ds
def read_isd_file(isd_file_path): """ Purpose: Reads an ISD CSV file (gz or uncompressed) and returns the data in a data structure. Assumptions: Usage: Author: PRI Date: June 2017 """ isd_file_name = os.path.split(isd_file_path)[1] msg = "Reading ISD file " + isd_file_name logger.info(msg) isd_site_id = isd_file_name.split("-") isd_site_id = isd_site_id[0] + "-" + isd_site_id[1] # read the file if os.path.splitext(isd_file_path)[1] == ".gz": with gzip.open(isd_file_path, 'rb') as fp: content = fp.readlines() else: with open(isd_file_path) as fp: content = fp.readlines() # get a data structure ds = qcio.DataStructure() # get the site latitude, longitude and altitude ds.globalattributes["altitude"] = float(content[0][46:51]) ds.globalattributes["latitude"] = float(content[0][28:34]) / float(1000) ds.globalattributes["longitude"] = float(content[0][34:41]) / float(1000) ds.globalattributes["isd_site_id"] = isd_site_id # initialise the data structure ds.series["DateTime"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Datetime", "units": "none" } } ds.series["Wd"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Wind direction", "units": "degrees" } } ds.series["Ws"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Wind speed", "units": "m/s" } } ds.series["Ta"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Air temperature", "units": "C" } } ds.series["Td"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Dew point temperature", "units": "C" } } ds.series["ps"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Surface pressure", "units": "kPa" } } ds.series["Precip"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Precipitation", "units": "mm" } } # define the codes for good data in the ISD file OK_obs_code = [ "AUTO ", "CRN05", "CRN15", "FM-12", "FM-15", "FM-16", "SY-MT" ] # iterate over the lines in the file and decode the data for i in range(len(content) - 1): #for i in range(10): # filter out anything other than hourly data if content[i][41:46] not in OK_obs_code: continue YY = int(content[i][15:19]) MM = int(content[i][19:21]) DD = int(content[i][21:23]) HH = int(content[i][23:25]) mm = int(content[i][25:27]) dt = datetime.datetime(YY, MM, DD, HH, mm, 0) ds.series["DateTime"]["Data"].append(pytz.utc.localize(dt)) # wind direction, degT try: ds.series["Wd"]["Data"].append(float(content[i][60:63])) except: ds.series["Wd"]["Data"].append(float(999)) # wind speed, m/s try: ds.series["Ws"]["Data"].append( float(content[i][65:69]) / float(10)) except: ds.series["Ws"]["Data"].append(float(999.9)) # air temperature, C try: ds.series["Ta"]["Data"].append( float(content[i][87:92]) / float(10)) except: ds.series["Ta"]["Data"].append(float(999.9)) # dew point temperature, C try: ds.series["Td"]["Data"].append( float(content[i][93:98]) / float(10)) except: ds.series["Td"]["Data"].append(float(999.9)) # sea level pressure, hPa try: ds.series["ps"]["Data"].append( float(content[i][99:104]) / float(10)) except: ds.series["ps"]["Data"].append(float(9999.9)) # precipitation, mm if content[i][108:111] == "AA1": try: ds.series["Precip"]["Data"].append( float(content[i][113:117]) / float(10)) except: ds.series["Precip"]["Data"].append(float(999.9)) else: ds.series["Precip"]["Data"].append(float(999.9)) # add the time zone to the DateTime ataributes ds.series["DateTime"]["Attr"]["time_zone"] = "UTC" # convert from lists to masked arrays f0 = numpy.zeros(len(ds.series["DateTime"]["Data"])) f1 = numpy.ones(len(ds.series["DateTime"]["Data"])) ds.series["DateTime"]["Data"] = numpy.array(ds.series["DateTime"]["Data"]) ds.series["DateTime"]["Flag"] = f0 ds.globalattributes["nc_nrecs"] = len(f0) dt_delta = qcutils.get_timestep(ds) ts = scipy.stats.mode(dt_delta)[0] / 60 ds.globalattributes["time_step"] = ts[0] ds.series["Wd"]["Data"] = numpy.ma.masked_equal(ds.series["Wd"]["Data"], 999) ds.series["Wd"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Wd"]["Data"]) == True, f1, f0) ds.series["Ws"]["Data"] = numpy.ma.masked_equal(ds.series["Ws"]["Data"], 999.9) ds.series["Ws"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Ws"]["Data"]) == True, f1, f0) ds.series["Ta"]["Data"] = numpy.ma.masked_equal(ds.series["Ta"]["Data"], 999.9) ds.series["Ta"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Ta"]["Data"]) == True, f1, f0) ds.series["Td"]["Data"] = numpy.ma.masked_equal(ds.series["Td"]["Data"], 999.9) ds.series["Td"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Td"]["Data"]) == True, f1, f0) # hPa to kPa ds.series["ps"]["Data"] = numpy.ma.masked_equal(ds.series["ps"]["Data"], 9999.9) / float(10) ds.series["ps"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["ps"]["Data"]) == True, f1, f0) # convert sea level pressure to station pressure site_altitude = float(ds.globalattributes["altitude"]) cfac = numpy.ma.exp( (-1 * site_altitude) / ((ds.series["Ta"]["Data"] + 273.15) * 29.263)) ds.series["ps"]["Data"] = ds.series["ps"]["Data"] * cfac # do precipitation and apply crude limits ds.series["Precip"]["Data"] = numpy.ma.masked_equal( ds.series["Precip"]["Data"], 999.9) condition = (ds.series["Precip"]["Data"] < 0) | (ds.series["Precip"]["Data"] > 100) ds.series["Precip"]["Data"] = numpy.ma.masked_where( condition, ds.series["Precip"]["Data"]) ds.series["Precip"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Precip"]["Data"]) == True, f1, f0) # get the humidities from Td Ta, flag, attr = qcutils.GetSeriesasMA(ds, "Ta") Td, flag, attr = qcutils.GetSeriesasMA(ds, "Td") ps, flag, attr = qcutils.GetSeriesasMA(ds, "ps") RH = mf.RHfromdewpoint(Td, Ta) flag = numpy.where(numpy.ma.getmaskarray(RH) == True, f1, f0) attr = {"long_name": "Relative humidity", "units": "%"} qcutils.CreateSeries(ds, "RH", RH, Flag=flag, Attr=attr) Ah = mf.absolutehumidityfromRH(Ta, RH) flag = numpy.where(numpy.ma.getmaskarray(Ah) == True, f1, f0) attr = {"long_name": "Absolute humidity", "units": "g/m3"} qcutils.CreateSeries(ds, "Ah", Ah, Flag=flag, Attr=attr) q = mf.specifichumidityfromRH(RH, Ta, ps) flag = numpy.where(numpy.ma.getmaskarray(q) == True, f1, f0) attr = {"long_name": "Specific humidity", "units": "kg/kg"} qcutils.CreateSeries(ds, "q", q, Flag=flag, Attr=attr) # return the data return ds
VPD_erai_tts, Flag=flag, Attr=attr) RH_erai_tts = float(100) * e_erai_tts / es_erai_tts flag = numpy.zeros(len(RH_erai_tts), dtype=numpy.int32) attr = qcutils.MakeAttributeDictionary(long_name="Relative humidity", units="percent") qcutils.CreateSeries(ds_erai, "RH", RH_erai_tts, Flag=flag, Attr=attr) # get the absolute humidity Ah_erai_tts = mf.absolutehumidityfromRH(Ta_erai_tts, RH_erai_tts) flag = numpy.zeros(len(Ah_erai_tts), dtype=numpy.int32) attr = qcutils.MakeAttributeDictionary(long_name="Absolute humidity", units="g/m3") qcutils.CreateSeries(ds_erai, "Ah", Ah_erai_tts, Flag=flag, Attr=attr) # get the specific humidity q_erai_tts = mf.specifichumidityfromRH(RH_erai_tts, Ta_erai_tts, ps_erai_tts) flag = numpy.zeros(len(q_erai_tts), dtype=numpy.int32) attr = qcutils.MakeAttributeDictionary(long_name="Specific humidity", units="kg/kg") qcutils.CreateSeries(ds_erai, "q", q_erai_tts, Flag=flag, Attr=attr) # Interpolate the 3 hourly boundary layer height to the tower time step # NOTE: ERA-I variables are dimensioned [time,latitude,longitude] Habl_3d = erai_file.variables["blh"][:, :, :] Habl_erai_3hr = Habl_3d[:, site_lat_index, site_lon_index] # get the spline interpolation function s = InterpolatedUnivariateSpline(erai_time_3hr, Habl_erai_3hr, k=1) # get the boundary layer height at the tower time step Habl_erai_tts = s(erai_time_tts) flag = numpy.zeros(len(Habl_erai_tts), dtype=numpy.int32) attr = qcutils.MakeAttributeDictionary(
accum_attr["units"] = "mm/30 minutes" # put the precipitation per time step back into the data struicture qcutils.CreateSeries(ds_all,output_label,precip,Flag=accum_flag,Attr=accum_attr) # calculate missing humidities RH_list = sorted([x for x in ds_all.series.keys() if ("RH" in x) and ("_QCFlag" not in x)]) Ta_list = sorted([x for x in ds_all.series.keys() if ("Ta" in x) and ("_QCFlag" not in x)]) ps_list = sorted([x for x in ds_all.series.keys() if ("ps" in x) and ("_QCFlag" not in x)]) for RH_label,Ta_label,ps_label in zip(RH_list,Ta_list,ps_list): Ta,f,a = qcutils.GetSeriesasMA(ds_all,Ta_label) RH,f,a = qcutils.GetSeriesasMA(ds_all,RH_label) ps,f,a = qcutils.GetSeriesasMA(ds_all,ps_label) Ah = mf.absolutehumidityfromRH(Ta, RH) attr = qcutils.MakeAttributeDictionary(long_name='Absolute humidity',units='g/m3',standard_name='not defined', bom_id=a["bom_id"],bom_name=a["bom_name"],bom_dist=a["bom_dist"]) qcutils.CreateSeries(ds_all,RH_label.replace("RH","Ah"),Ah,Flag=f,Attr=attr) q = mf.specifichumidityfromRH(RH, Ta, ps) attr = qcutils.MakeAttributeDictionary(long_name='Specific humidity',units='kg/kg',standard_name='not defined', bom_id=a["bom_id"],bom_name=a["bom_name"],bom_dist=a["bom_dist"]) qcutils.CreateSeries(ds_all,RH_label.replace("RH","q"),q,Flag=f,Attr=attr) # now write the data structure to file # OMG, the user may want to overwrite the old data ... if os.path.exists(ncname): # ... but we will save them from themselves! t = time.localtime() rundatetime = datetime.datetime(t[0],t[1],t[2],t[3],t[4],t[5]).strftime("%Y%m%d%H%M") new_ext = "_"+rundatetime+".nc" # add the current local datetime the old file name newFileName = ncname.replace(".nc",new_ext) msg = " Renaming "+ncname+" to "+newFileName log.info(msg)