def HRRS_mean_ztrop_to_csv(DR,hostname='taurus',debug=False): """ Given a certain daterange, retrieve available high res radiosonde data, compute the average tropopause height per station, and store in a csv file. """ from TIL import ztrop # first read in station information as a dataframe stationdata = HRRS_station_data(hostname) # because the HRRS data are sorted by years, loop over the years in the daterange y0 = DR[0].year yf = DR[len(DR)-1].year years = range(y0,yf+1,1) for YYYY in years: # load a list of the available stations for that year Slist = HRRS_stations_available_per_year(YYYY) # also compute the subset of the requested daterange that fits into this year. year_daterange = dart.daterange(date_start=datetime.datetime(YYYY,1,1,0,0,0), periods=365*4, DT='6H') DR2 = set(year_daterange).intersection(DR) # also find the dir where the station data live datadir = es.obs_data_paths('HRRS',hostname) # initialize empty dictionary to hold average tropoopause heights per station ztrop_dict=dict() # now loop over available stations, and for each one, retrieve the data # that fit into the requested daterange for s in Slist: ztrop_list=[] # empty list to hold tropopause heights for all available obs per station # loop over dates, and retrieve data if available for dd in DR2: datestr = dd.strftime("%Y%m%d%H") ff = datadir+'/'+str(YYYY)+'/'+str(s)+'/'+str(s)+'-'+datestr+'_mod.dat' if os.path.exists(ff): if debug: print(ff) # read in the station data D = read_HRRS_data(ff) # compute tropopause height z=D['Alt']/1E3 # Altitude in km T=D['Temp']+273.15 # Temp in Kelvin ztropp=ztrop(z=z,T=T,debug=debug,hostname=hostname) # add to list if not none if ztropp is not None: ztrop_list.append(ztropp) # average the tropopause heights and add to dictionary ztrop_dict[s]=np.mean(ztrop_list) # turn dict into data frame ZT=pd.Series(data=ztrop_dict, name='ztrop_mean') if debug: print(ZT) # turn dataframe into csv file hrrs_path = es.obs_data_paths('HRRS',hostname) datestr = DR[0].strftime("%Y%m%d")+'-'+DR[len(DR)-1].strftime("%Y%m%d")+'.csv' fname=hrrs_path+'/'+'mean_tropopause_height_per_station_'+datestr print('storing file '+fname) ZT.to_csv(fname, index=True, sep=',',header=True) return(ZT)
def HRRS_mean_ztrop_to_csv(DR, hostname='taurus', debug=False): """ Given a certain daterange, retrieve available high res radiosonde data, compute the average tropopause height per station, and store in a csv file. """ from TIL import ztrop # first read in station information as a dataframe stationdata = HRRS_station_data(hostname) # because the HRRS data are sorted by years, loop over the years in the daterange y0 = DR[0].year yf = DR[len(DR) - 1].year years = range(y0, yf + 1, 1) for YYYY in years: # load a list of the available stations for that year Slist = HRRS_stations_available_per_year(YYYY) # also compute the subset of the requested daterange that fits into this year. year_daterange = dart.daterange(date_start=datetime.datetime( YYYY, 1, 1, 0, 0, 0), periods=365 * 4, DT='6H') DR2 = set(year_daterange).intersection(DR) # also find the dir where the station data live datadir = es.obs_data_paths('HRRS', hostname) # initialize empty dictionary to hold average tropoopause heights per station ztrop_dict = dict() # now loop over available stations, and for each one, retrieve the data # that fit into the requested daterange for s in Slist: ztrop_list = [ ] # empty list to hold tropopause heights for all available obs per station # loop over dates, and retrieve data if available for dd in DR2: datestr = dd.strftime("%Y%m%d%H") ff = datadir + '/' + str(YYYY) + '/' + str(s) + '/' + str( s) + '-' + datestr + '_mod.dat' if os.path.exists(ff): if debug: print(ff) # read in the station data D = read_HRRS_data(ff) # compute tropopause height z = D['Alt'] / 1E3 # Altitude in km T = D['Temp'] + 273.15 # Temp in Kelvin ztropp = ztrop(z=z, T=T, debug=debug, hostname=hostname) # add to list if not none if ztropp is not None: ztrop_list.append(ztropp) # average the tropopause heights and add to dictionary ztrop_dict[s] = np.mean(ztrop_list) # turn dict into data frame ZT = pd.Series(data=ztrop_dict, name='ztrop_mean') if debug: print(ZT) # turn dataframe into csv file hrrs_path = es.obs_data_paths('HRRS', hostname) datestr = DR[0].strftime("%Y%m%d") + '-' + DR[len(DR) - 1].strftime( "%Y%m%d") + '.csv' fname = hrrs_path + '/' + 'mean_tropopause_height_per_station_' + datestr print('storing file ' + fname) ZT.to_csv(fname, index=True, sep=',', header=True) return (ZT)
def TP_based_HRRS_data(ff,vertical_res_km=50E-3,debug=False,hostname='taurus'): """ Given a single high-res radiosonde data sounding (identified by its full file path, ff) load the data from the sounding and compute the temperature data as a function of distance from the thermal tropopause. This is done by: 1. reading in the data as a pandas data frame 2. computing the height of the tropopause 3. computin the altitude of each data point relative to the tropopause 4. using a cubic spline to create evenly-spaced temperatures on a vertical grid with 50m spacing. This procedure is based on Birner et al. 2002 (http://doi.wiley.com/10.1029/2002GL015142) Here the LR tropopause follows the WMO criterion. Quoting Birner et al. (2002): The thermal TP is defined as the lowest level where the temperature lapse rate falls below 2 K/km and its average between this level and all higher levels within 2 km remains below this value [WMO, 1957]. INPUTS: ff: the full path to the HRRS profile that we will load vertical_res_km: vertical resolution of the grid to which we interpolate, in km. The default is 50m, which is roughly the vertical resolution of the HRRS obs. """ # read in the data as a data frame DF0 = read_HRRS_data(ff) # drop unnecessary columns useless_cols=['Time','Dewpt','RH','Ucmp','Vcmp','spd','dir','Lat','Lon', 'Wcmp', 'Ele', 'Azi', 'Qp', 'Qt', 'Qrh', 'Qu', 'Qv', 'QdZ'] DF0.drop(useless_cols,inplace=True,axis=1) # get rid of NaNs DF=DF0.dropna() if debug: print('Loading file '+ff) # load interpolate function from scipy from scipy.interpolate import interp1d # compute the height of the lapse-tropopause from the altitude array z=DF['Alt']/1E3 # Altitude in km T=DF['Temp']+273.15 # Temp in Kelvin P=DF['Press'] N2=DF['N2'] from TIL import ztrop ztropp=ztrop(z=z,T=T,debug=debug,hostname=hostname) if ztropp is not None: # extract the station number from the file path file_components=ff.split('/') station=file_components[len(file_components)-2] # retrieve the mean tropopause height for this station # TODO: right noew this reads in a csv file of mean heights for Jan 2010. # ...need to make this more dynamic and obviously not user and host specific ZT=pd.read_csv('/data/c1/lneef/HRRS//mean_tropopause_height_per_station_20100101-20100131.csv',index_col=0) ztrop_mean = ZT.loc[int(station)].ztrop_mean # now compute the altitude relative to the tropopause, plus mean tropopause height zTP = DF['Alt']*1E-3-ztropp+ztrop_mean # interpolate temp, pressure to this new coordinate fT = interp1d(zTP, T, kind='linear') fP = interp1d(zTP, P, kind='linear') # create a regularly spaced grid (in km) zTPgrid=np.arange(0.0,26.0, vertical_res_km) # select whatever part of the regular grid fits into the range sampled by this sounding select = np.where(np.logical_and(zTPgrid>min(zTP), zTPgrid<max(zTP))) zTPnew=zTPgrid[select] # now compute the variables on this grid using the interpolate function Tnew = fT(zTPnew) Pnew = fP(zTPnew) # N2 comes out quite noisy when computed from raw radiosonde observations. # The spline (needed to get the obs on a common grid) is an opportunity for smoothing # the temperature field a bit, which will yield a smoother N2 profile -- so just recompute # N2 here from TIL import Nsq N2new = Nsq(Tnew,zTPnew,Pnew) # now create a new dataframe with the TP-based heights new_data={'Press':Pnew,'Temp':Tnew,'Alt':zTPnew,'N2':N2new,'ztropp':ztropp} Dout = pd.DataFrame(data=new_data) else: if debug: print('No clear lapse-rate tropopause found for the following sounding:') print(ff) print('Returning None') Dout=None return(Dout)
def TP_based_HRRS_data(ff, vertical_res_km=50E-3, debug=False, hostname='taurus'): """ Given a single high-res radiosonde data sounding (identified by its full file path, ff) load the data from the sounding and compute the temperature data as a function of distance from the thermal tropopause. This is done by: 1. reading in the data as a pandas data frame 2. computing the height of the tropopause 3. computin the altitude of each data point relative to the tropopause 4. using a cubic spline to create evenly-spaced temperatures on a vertical grid with 50m spacing. This procedure is based on Birner et al. 2002 (http://doi.wiley.com/10.1029/2002GL015142) Here the LR tropopause follows the WMO criterion. Quoting Birner et al. (2002): The thermal TP is defined as the lowest level where the temperature lapse rate falls below 2 K/km and its average between this level and all higher levels within 2 km remains below this value [WMO, 1957]. INPUTS: ff: the full path to the HRRS profile that we will load vertical_res_km: vertical resolution of the grid to which we interpolate, in km. The default is 50m, which is roughly the vertical resolution of the HRRS obs. """ # read in the data as a data frame DF0 = read_HRRS_data(ff) # drop unnecessary columns useless_cols = [ 'Time', 'Dewpt', 'RH', 'Ucmp', 'Vcmp', 'spd', 'dir', 'Lat', 'Lon', 'Wcmp', 'Ele', 'Azi', 'Qp', 'Qt', 'Qrh', 'Qu', 'Qv', 'QdZ' ] DF0.drop(useless_cols, inplace=True, axis=1) # get rid of NaNs DF = DF0.dropna() if debug: print('Loading file ' + ff) # load interpolate function from scipy from scipy.interpolate import interp1d # compute the height of the lapse-tropopause from the altitude array z = DF['Alt'] / 1E3 # Altitude in km T = DF['Temp'] + 273.15 # Temp in Kelvin P = DF['Press'] N2 = DF['N2'] from TIL import ztrop ztropp = ztrop(z=z, T=T, debug=debug, hostname=hostname) if ztropp is not None: # extract the station number from the file path file_components = ff.split('/') station = file_components[len(file_components) - 2] # retrieve the mean tropopause height for this station # TODO: right noew this reads in a csv file of mean heights for Jan 2010. # ...need to make this more dynamic and obviously not user and host specific ZT = pd.read_csv( '/data/c1/lneef/HRRS//mean_tropopause_height_per_station_20100101-20100131.csv', index_col=0) ztrop_mean = ZT.loc[int(station)].ztrop_mean # now compute the altitude relative to the tropopause, plus mean tropopause height zTP = DF['Alt'] * 1E-3 - ztropp + ztrop_mean # interpolate temp, pressure to this new coordinate fT = interp1d(zTP, T, kind='linear') fP = interp1d(zTP, P, kind='linear') # create a regularly spaced grid (in km) zTPgrid = np.arange(0.0, 26.0, vertical_res_km) # select whatever part of the regular grid fits into the range sampled by this sounding select = np.where( np.logical_and(zTPgrid > min(zTP), zTPgrid < max(zTP))) zTPnew = zTPgrid[select] # now compute the variables on this grid using the interpolate function Tnew = fT(zTPnew) Pnew = fP(zTPnew) # N2 comes out quite noisy when computed from raw radiosonde observations. # The spline (needed to get the obs on a common grid) is an opportunity for smoothing # the temperature field a bit, which will yield a smoother N2 profile -- so just recompute # N2 here from TIL import Nsq N2new = Nsq(Tnew, zTPnew, Pnew) # now create a new dataframe with the TP-based heights new_data = { 'Press': Pnew, 'Temp': Tnew, 'Alt': zTPnew, 'N2': N2new, 'ztropp': ztropp } Dout = pd.DataFrame(data=new_data) else: if debug: print( 'No clear lapse-rate tropopause found for the following sounding:' ) print(ff) print('Returning None') Dout = None return (Dout)