def read_COSMIC_TPbased_mean(E, hostname='taurus'): """ Read in COSMIC temp and N2 data averaged with respect to the local tropopause. """ from netCDF4 import Dataset # find the path to the data datadir = es.obs_data_paths('COSMIC', hostname) # TODO: right now this loads the only file I have. Later can code a dynamic way to # choose files based on what is specified in E. ff = 'mean_GPS-RO_45-60N_COSMIC_Jan2010_TPbased_mean.nc' filename = datadir + ff # open the file and read in the relevant data into dict D = dict() f = Dataset(filename, 'r') varnames = {'T': 'T', 'Nsq': 'N2'} variable = varnames[E['variable']] D['z'] = f.variables['z'][:] D['data'] = f.variables[variable][:] D['units'] = f.variables[variable].units # one last thing! Convert Celsius to Kelvin if 'Celsius' in f.variables[variable].units: D['data'] = f.variables[variable][:] + 273.5 D['units'] = 'K' f.close() return D
def HRRS_station_data(hostname='taurus'): """ Read in information about the high-res radiosondes and return it as a pandas dataframe. """ datadir = es.obs_data_paths('HRRS',hostname) ff=datadir+'ListOfStations.dat' colnames=[ 'WBAN','Station_Name','State','Country','WMO_Code','Lat','Lon','Height','Transition date'] stations = pd.read_csv(ff,delimiter=",",error_bad_lines=False,skiprows=1,names=colnames,index_col='WBAN') # a few columns have to be coerced to numeric stations[['Lat','Lon']] = stations[['Lat','Lon']].apply(pd.to_numeric, errors='coerce') return(stations)
def HRRS_station_data(hostname='taurus'): """ Read in information about the high-res radiosondes and return it as a pandas dataframe. """ datadir = es.obs_data_paths('HRRS', hostname) ff = datadir + 'ListOfStations.dat' colnames = [ 'WBAN', 'Station_Name', 'State', 'Country', 'WMO_Code', 'Lat', 'Lon', 'Height', 'Transition date' ] stations = pd.read_csv(ff, delimiter=",", error_bad_lines=False, skiprows=1, names=colnames, index_col='WBAN') # a few columns have to be coerced to numeric stations[['Lat', 'Lon']] = stations[['Lat', 'Lon']].apply(pd.to_numeric, errors='coerce') return (stations)
def HRRS_mean_ztrop_to_csv(DR,hostname='taurus',debug=False): """ Given a certain daterange, retrieve available high res radiosonde data, compute the average tropopause height per station, and store in a csv file. """ from TIL import ztrop # first read in station information as a dataframe stationdata = HRRS_station_data(hostname) # because the HRRS data are sorted by years, loop over the years in the daterange y0 = DR[0].year yf = DR[len(DR)-1].year years = range(y0,yf+1,1) for YYYY in years: # load a list of the available stations for that year Slist = HRRS_stations_available_per_year(YYYY) # also compute the subset of the requested daterange that fits into this year. year_daterange = dart.daterange(date_start=datetime.datetime(YYYY,1,1,0,0,0), periods=365*4, DT='6H') DR2 = set(year_daterange).intersection(DR) # also find the dir where the station data live datadir = es.obs_data_paths('HRRS',hostname) # initialize empty dictionary to hold average tropoopause heights per station ztrop_dict=dict() # now loop over available stations, and for each one, retrieve the data # that fit into the requested daterange for s in Slist: ztrop_list=[] # empty list to hold tropopause heights for all available obs per station # loop over dates, and retrieve data if available for dd in DR2: datestr = dd.strftime("%Y%m%d%H") ff = datadir+'/'+str(YYYY)+'/'+str(s)+'/'+str(s)+'-'+datestr+'_mod.dat' if os.path.exists(ff): if debug: print(ff) # read in the station data D = read_HRRS_data(ff) # compute tropopause height z=D['Alt']/1E3 # Altitude in km T=D['Temp']+273.15 # Temp in Kelvin ztropp=ztrop(z=z,T=T,debug=debug,hostname=hostname) # add to list if not none if ztropp is not None: ztrop_list.append(ztropp) # average the tropopause heights and add to dictionary ztrop_dict[s]=np.mean(ztrop_list) # turn dict into data frame ZT=pd.Series(data=ztrop_dict, name='ztrop_mean') if debug: print(ZT) # turn dataframe into csv file hrrs_path = es.obs_data_paths('HRRS',hostname) datestr = DR[0].strftime("%Y%m%d")+'-'+DR[len(DR)-1].strftime("%Y%m%d")+'.csv' fname=hrrs_path+'/'+'mean_tropopause_height_per_station_'+datestr print('storing file '+fname) ZT.to_csv(fname, index=True, sep=',',header=True) return(ZT)
def HRRS_as_DF(OBS,TPbased=False,TPbased_vertical_res=50E-3,hostname='taurus',debug=False): """ Loop over a set of dates and a specified latitude- and longitude range, and return the available high-resolution radiosonde data as a pandas data frame INPUTS: OBS: a dictionary with the following entries: daterange: a list of datetime objects that give the desired date range latrange: a list giving the bounding latitudes of the desired range lonrange: a list giving the bounding longitudes of the desired range Note that OBS can be a DART experiment dictionary (see DART.py), but the DART/model specific entries are ignored. TPbased: set to True to return the profiles ordered into regularly-spaced altitudes relative to the tropopause - default is False. hostname: default is taurus debug: set to True to print some stuff out. Default is False. TPbased_vertical_res: resolution of the grid to which we inteprolate the obs doing TP-based coordinates. Default is 50m. """ # first read in station information as a dataframe stationdata = HRRS_station_data(hostname) # initialize an empy list which will hold the data frames for each station and time DFlist=[] # because the HRRS data are sorted by years, loop over the years in the daterange DR=OBS['daterange'] y0 = DR[0].year yf = DR[len(DR)-1].year years = range(y0,yf+1,1) for YYYY in years: # load a list of the available stations for that year Slist = HRRS_stations_available_per_year(YYYY) # trim list down to the ones that fit into the latitude range stations_lat = [s for s in Slist if stationdata.loc[int(s)]['Lat'] >= OBS['latrange'][0] and stationdata.loc[int(s)]['Lat'] <= OBS['latrange'][1] ] # trim list down to the ones that fit into the longitude range stations_latlon = [s for s in stations_lat if stationdata.loc[int(s)]['Lon'] >= OBS['lonrange'][0] and stationdata.loc[int(s)]['Lon'] <= OBS['lonrange'][1] ] # also compute the subset of the requested daterange that fits into this year. year_daterange = dart.daterange(date_start=datetime.datetime(YYYY,1,1,0,0,0), periods=365*4, DT='6H') DR2 = set(year_daterange).intersection(DR) # also find the dir where the station data live datadir = es.obs_data_paths('HRRS',hostname) # now loop over available stations, and for each one, retrieve the data # that fit into the requested daterange for s in stations_latlon: # loop over dates, and retrieve data if available for dd in DR2: datestr = dd.strftime("%Y%m%d%H") ff = datadir+'/'+str(YYYY)+'/'+str(s)+'/'+str(s)+'-'+datestr+'_mod.dat' if os.path.exists(ff): if debug: print(ff) # read in the station data if TPbased: D = TP_based_HRRS_data(ff,vertical_res_km=TPbased_vertical_res) alt_to_km = 1.0 # here the altitude is already in km temp_to_K = 0.0 else: D = read_HRRS_data(ff) alt_to_km = 1.0E-3 # raw data are in m -- convert to km temp_to_K = 273.15 # raw data need to be converted to kelvin if D is not None: # also add a column holding the date D['Date'] = pd.Series(dd, index=D.index) # also add a column holding the station number D['StationNumber'] = pd.Series(s, index=D.index) # make sure altitude is in km # and temp in Kelvin D['Alt']=D['Alt']*alt_to_km D['Temp']=D['Temp']+temp_to_K # get rid of some unneeded columns if not TPbased: useless_cols=['Time','Dewpt','RH','Ucmp','Vcmp','spd','dir', 'Wcmp', 'Ele', 'Azi', 'Qp', 'Qt', 'Qrh', 'Qu', 'Qv', 'QdZ'] D.drop(useless_cols,inplace=True,axis=1) # append to list of data frames DFlist.append(D) # merge the list of data frames into a single DF using list comprehension DFout = pd.concat(DFlist, axis=0) return(DFout)
def HRRS_mean_ztrop_to_csv(DR, hostname='taurus', debug=False): """ Given a certain daterange, retrieve available high res radiosonde data, compute the average tropopause height per station, and store in a csv file. """ from TIL import ztrop # first read in station information as a dataframe stationdata = HRRS_station_data(hostname) # because the HRRS data are sorted by years, loop over the years in the daterange y0 = DR[0].year yf = DR[len(DR) - 1].year years = range(y0, yf + 1, 1) for YYYY in years: # load a list of the available stations for that year Slist = HRRS_stations_available_per_year(YYYY) # also compute the subset of the requested daterange that fits into this year. year_daterange = dart.daterange(date_start=datetime.datetime( YYYY, 1, 1, 0, 0, 0), periods=365 * 4, DT='6H') DR2 = set(year_daterange).intersection(DR) # also find the dir where the station data live datadir = es.obs_data_paths('HRRS', hostname) # initialize empty dictionary to hold average tropoopause heights per station ztrop_dict = dict() # now loop over available stations, and for each one, retrieve the data # that fit into the requested daterange for s in Slist: ztrop_list = [ ] # empty list to hold tropopause heights for all available obs per station # loop over dates, and retrieve data if available for dd in DR2: datestr = dd.strftime("%Y%m%d%H") ff = datadir + '/' + str(YYYY) + '/' + str(s) + '/' + str( s) + '-' + datestr + '_mod.dat' if os.path.exists(ff): if debug: print(ff) # read in the station data D = read_HRRS_data(ff) # compute tropopause height z = D['Alt'] / 1E3 # Altitude in km T = D['Temp'] + 273.15 # Temp in Kelvin ztropp = ztrop(z=z, T=T, debug=debug, hostname=hostname) # add to list if not none if ztropp is not None: ztrop_list.append(ztropp) # average the tropopause heights and add to dictionary ztrop_dict[s] = np.mean(ztrop_list) # turn dict into data frame ZT = pd.Series(data=ztrop_dict, name='ztrop_mean') if debug: print(ZT) # turn dataframe into csv file hrrs_path = es.obs_data_paths('HRRS', hostname) datestr = DR[0].strftime("%Y%m%d") + '-' + DR[len(DR) - 1].strftime( "%Y%m%d") + '.csv' fname = hrrs_path + '/' + 'mean_tropopause_height_per_station_' + datestr print('storing file ' + fname) ZT.to_csv(fname, index=True, sep=',', header=True) return (ZT)
def HRRS_as_DF(OBS, TPbased=False, TPbased_vertical_res=50E-3, hostname='taurus', debug=False): """ Loop over a set of dates and a specified latitude- and longitude range, and return the available high-resolution radiosonde data as a pandas data frame INPUTS: OBS: a dictionary with the following entries: daterange: a list of datetime objects that give the desired date range latrange: a list giving the bounding latitudes of the desired range lonrange: a list giving the bounding longitudes of the desired range Note that OBS can be a DART experiment dictionary (see DART.py), but the DART/model specific entries are ignored. TPbased: set to True to return the profiles ordered into regularly-spaced altitudes relative to the tropopause - default is False. hostname: default is taurus debug: set to True to print some stuff out. Default is False. TPbased_vertical_res: resolution of the grid to which we inteprolate the obs doing TP-based coordinates. Default is 50m. """ # first read in station information as a dataframe stationdata = HRRS_station_data(hostname) # initialize an empy list which will hold the data frames for each station and time DFlist = [] # because the HRRS data are sorted by years, loop over the years in the daterange DR = OBS['daterange'] y0 = DR[0].year yf = DR[len(DR) - 1].year years = range(y0, yf + 1, 1) for YYYY in years: # load a list of the available stations for that year Slist = HRRS_stations_available_per_year(YYYY) # trim list down to the ones that fit into the latitude range stations_lat = [ s for s in Slist if stationdata.loc[int(s)]['Lat'] >= OBS['latrange'][0] and stationdata.loc[int(s)]['Lat'] <= OBS['latrange'][1] ] # trim list down to the ones that fit into the longitude range stations_latlon = [ s for s in stations_lat if stationdata.loc[int(s)]['Lon'] >= OBS['lonrange'][0] and stationdata.loc[int(s)]['Lon'] <= OBS['lonrange'][1] ] # also compute the subset of the requested daterange that fits into this year. year_daterange = dart.daterange(date_start=datetime.datetime( YYYY, 1, 1, 0, 0, 0), periods=365 * 4, DT='6H') DR2 = set(year_daterange).intersection(DR) # also find the dir where the station data live datadir = es.obs_data_paths('HRRS', hostname) # now loop over available stations, and for each one, retrieve the data # that fit into the requested daterange for s in stations_latlon: # loop over dates, and retrieve data if available for dd in DR2: datestr = dd.strftime("%Y%m%d%H") ff = datadir + '/' + str(YYYY) + '/' + str(s) + '/' + str( s) + '-' + datestr + '_mod.dat' if os.path.exists(ff): if debug: print(ff) # read in the station data if TPbased: D = TP_based_HRRS_data( ff, vertical_res_km=TPbased_vertical_res) alt_to_km = 1.0 # here the altitude is already in km temp_to_K = 0.0 else: D = read_HRRS_data(ff) alt_to_km = 1.0E-3 # raw data are in m -- convert to km temp_to_K = 273.15 # raw data need to be converted to kelvin if D is not None: # also add a column holding the date D['Date'] = pd.Series(dd, index=D.index) # also add a column holding the station number D['StationNumber'] = pd.Series(s, index=D.index) # make sure altitude is in km # and temp in Kelvin D['Alt'] = D['Alt'] * alt_to_km D['Temp'] = D['Temp'] + temp_to_K # get rid of some unneeded columns if not TPbased: useless_cols = [ 'Time', 'Dewpt', 'RH', 'Ucmp', 'Vcmp', 'spd', 'dir', 'Wcmp', 'Ele', 'Azi', 'Qp', 'Qt', 'Qrh', 'Qu', 'Qv', 'QdZ' ] D.drop(useless_cols, inplace=True, axis=1) # append to list of data frames DFlist.append(D) # merge the list of data frames into a single DF using list comprehension DFout = pd.concat(DFlist, axis=0) return (DFout)