def get_available_date_range(exp_name): """ given some existing DART experiment, return the daterange of all currently available data """ N = {'W0910_GLOBAL' : dart.daterange(date_start=datetime.datetime(2009,10,1,0,0,0), periods=380, DT='6H'), 'W0910_NODA' :dart.daterange(date_start=datetime.datetime(2009,10,1,0,0,0), periods=640, DT='6H'), } return N[exp_name]
def get_available_date_range(exp_name): """ given some existing DART experiment, return the daterange of all currently available data """ N = { 'W0910_GLOBAL': dart.daterange(date_start=datetime.datetime(2009, 10, 1, 0, 0, 0), periods=380, DT='6H'), 'W0910_NODA': dart.daterange(date_start=datetime.datetime(2009, 10, 1, 0, 0, 0), periods=640, DT='6H'), } return N[exp_name]
def HRRS_mean_ztrop_to_csv(DR,hostname='taurus',debug=False): """ Given a certain daterange, retrieve available high res radiosonde data, compute the average tropopause height per station, and store in a csv file. """ from TIL import ztrop # first read in station information as a dataframe stationdata = HRRS_station_data(hostname) # because the HRRS data are sorted by years, loop over the years in the daterange y0 = DR[0].year yf = DR[len(DR)-1].year years = range(y0,yf+1,1) for YYYY in years: # load a list of the available stations for that year Slist = HRRS_stations_available_per_year(YYYY) # also compute the subset of the requested daterange that fits into this year. year_daterange = dart.daterange(date_start=datetime.datetime(YYYY,1,1,0,0,0), periods=365*4, DT='6H') DR2 = set(year_daterange).intersection(DR) # also find the dir where the station data live datadir = es.obs_data_paths('HRRS',hostname) # initialize empty dictionary to hold average tropoopause heights per station ztrop_dict=dict() # now loop over available stations, and for each one, retrieve the data # that fit into the requested daterange for s in Slist: ztrop_list=[] # empty list to hold tropopause heights for all available obs per station # loop over dates, and retrieve data if available for dd in DR2: datestr = dd.strftime("%Y%m%d%H") ff = datadir+'/'+str(YYYY)+'/'+str(s)+'/'+str(s)+'-'+datestr+'_mod.dat' if os.path.exists(ff): if debug: print(ff) # read in the station data D = read_HRRS_data(ff) # compute tropopause height z=D['Alt']/1E3 # Altitude in km T=D['Temp']+273.15 # Temp in Kelvin ztropp=ztrop(z=z,T=T,debug=debug,hostname=hostname) # add to list if not none if ztropp is not None: ztrop_list.append(ztropp) # average the tropopause heights and add to dictionary ztrop_dict[s]=np.mean(ztrop_list) # turn dict into data frame ZT=pd.Series(data=ztrop_dict, name='ztrop_mean') if debug: print(ZT) # turn dataframe into csv file hrrs_path = es.obs_data_paths('HRRS',hostname) datestr = DR[0].strftime("%Y%m%d")+'-'+DR[len(DR)-1].strftime("%Y%m%d")+'.csv' fname=hrrs_path+'/'+'mean_tropopause_height_per_station_'+datestr print('storing file '+fname) ZT.to_csv(fname, index=True, sep=',',header=True) return(ZT)
def HRRS_as_DF(OBS,TPbased=False,TPbased_vertical_res=50E-3,hostname='taurus',debug=False): """ Loop over a set of dates and a specified latitude- and longitude range, and return the available high-resolution radiosonde data as a pandas data frame INPUTS: OBS: a dictionary with the following entries: daterange: a list of datetime objects that give the desired date range latrange: a list giving the bounding latitudes of the desired range lonrange: a list giving the bounding longitudes of the desired range Note that OBS can be a DART experiment dictionary (see DART.py), but the DART/model specific entries are ignored. TPbased: set to True to return the profiles ordered into regularly-spaced altitudes relative to the tropopause - default is False. hostname: default is taurus debug: set to True to print some stuff out. Default is False. TPbased_vertical_res: resolution of the grid to which we inteprolate the obs doing TP-based coordinates. Default is 50m. """ # first read in station information as a dataframe stationdata = HRRS_station_data(hostname) # initialize an empy list which will hold the data frames for each station and time DFlist=[] # because the HRRS data are sorted by years, loop over the years in the daterange DR=OBS['daterange'] y0 = DR[0].year yf = DR[len(DR)-1].year years = range(y0,yf+1,1) for YYYY in years: # load a list of the available stations for that year Slist = HRRS_stations_available_per_year(YYYY) # trim list down to the ones that fit into the latitude range stations_lat = [s for s in Slist if stationdata.loc[int(s)]['Lat'] >= OBS['latrange'][0] and stationdata.loc[int(s)]['Lat'] <= OBS['latrange'][1] ] # trim list down to the ones that fit into the longitude range stations_latlon = [s for s in stations_lat if stationdata.loc[int(s)]['Lon'] >= OBS['lonrange'][0] and stationdata.loc[int(s)]['Lon'] <= OBS['lonrange'][1] ] # also compute the subset of the requested daterange that fits into this year. year_daterange = dart.daterange(date_start=datetime.datetime(YYYY,1,1,0,0,0), periods=365*4, DT='6H') DR2 = set(year_daterange).intersection(DR) # also find the dir where the station data live datadir = es.obs_data_paths('HRRS',hostname) # now loop over available stations, and for each one, retrieve the data # that fit into the requested daterange for s in stations_latlon: # loop over dates, and retrieve data if available for dd in DR2: datestr = dd.strftime("%Y%m%d%H") ff = datadir+'/'+str(YYYY)+'/'+str(s)+'/'+str(s)+'-'+datestr+'_mod.dat' if os.path.exists(ff): if debug: print(ff) # read in the station data if TPbased: D = TP_based_HRRS_data(ff,vertical_res_km=TPbased_vertical_res) alt_to_km = 1.0 # here the altitude is already in km temp_to_K = 0.0 else: D = read_HRRS_data(ff) alt_to_km = 1.0E-3 # raw data are in m -- convert to km temp_to_K = 273.15 # raw data need to be converted to kelvin if D is not None: # also add a column holding the date D['Date'] = pd.Series(dd, index=D.index) # also add a column holding the station number D['StationNumber'] = pd.Series(s, index=D.index) # make sure altitude is in km # and temp in Kelvin D['Alt']=D['Alt']*alt_to_km D['Temp']=D['Temp']+temp_to_K # get rid of some unneeded columns if not TPbased: useless_cols=['Time','Dewpt','RH','Ucmp','Vcmp','spd','dir', 'Wcmp', 'Ele', 'Azi', 'Qp', 'Qt', 'Qrh', 'Qu', 'Qv', 'QdZ'] D.drop(useless_cols,inplace=True,axis=1) # append to list of data frames DFlist.append(D) # merge the list of data frames into a single DF using list comprehension DFout = pd.concat(DFlist, axis=0) return(DFout)
def get_experiment_date_ranges(exp_name): # stored date ranges for various DART experiments DR = None # CAM experiments for ERP assimilation study if exp_name == 'NODA': DR = dart.daterange(date_start=datetime.datetime(2009,1,1,0,0,0), periods=31, DT='1D') if exp_name == 'ERPALL': DR = dart.daterange(date_start=datetime.datetime(2009,1,1,0,0,0), periods=31, DT='1D') if exp_name == 'RST': DR = dart.daterange(date_start=datetime.datetime(2009,1,1,0,0,0), periods=17, DT='1D') if exp_name == 'ERPRST': DR = dart.daterange(date_start=datetime.datetime(2009,1,1,0,0,0), periods=17, DT='1D') # DART-WACCM runs performed at GEOMAR if exp_name == 'PMO32': DR = dart.daterange(date_start=datetime.datetime(2009,10,1,6,0,0), periods=31, DT='6H') if exp_name == 'W0910_NODA': DR = dart.daterange(date_start=datetime.datetime(2009,10,1,12,0,0), periods=596, DT='6H') if exp_name == 'W0910_GLOBAL': DR = dart.daterange(date_start=datetime.datetime(2009,10,1,12,0,0), periods=596, DT='6H') if exp_name == 'W0910_TROPICS': DR = dart.daterange(date_start=datetime.datetime(2009,10,1,12,0,0), periods=596, DT='6H') if exp_name == 'W0910_NODART': DR = dart.daterange(date_start=datetime.datetime(2009,10,1,12,0,0), periods=10, DT='6H') if exp_name == 'W0910_NOSTOP': DR = dart.daterange(date_start=datetime.datetime(2009,10,1,12,0,0), periods=64, DT='6H') # WACCM PMO runs performed by Nick Pedatella at NCAR if exp_name == 'NCAR_PMO_CONTROL': DR = dart.daterange(date_start=datetime.datetime(2008,11,6,6,0,0), periods=72, DT='6H') if exp_name == 'NCAR_PMO_LAS': DR = dart.daterange(date_start=datetime.datetime(2008,11,6,6,0,0), periods=72, DT='6H') if exp_name == 'NCAR_PMO_LA': DR = dart.daterange(date_start=datetime.datetime(2008,11,6,6,0,0), periods=72, DT='6H') # WACCM real-obs runs performed by Nick Pedatella at NCAR if exp_name == 'NCAR_FULL': DR = dart.daterange(date_start=datetime.datetime(2009,1,1,6,0,0), periods=204, DT='6H') if exp_name == 'NCAR_LAONLY': DR = dart.daterange(date_start=datetime.datetime(2009,1,1,6,0,0), periods=204, DT='6H') if DR is None: print('find_paths Cannot find experiment '+exp_name+' returning...') return DR
def ano(E,climatology_option = 'NODA',hostname='taurus',verbose=False): """ Compute anomaly fields relative to some climatology Inputs allowed for climatology_option: 'NODA': take the ensemble mean of the corresponding no-DA experiment as a 40-year climatology 'F_W4_L66': daily climatology of a CESM+WACCM simulation with realistic forcings, 1951-2010 None: don't subtract out anything -- just return the regular fields in the same shape as other "anomalies" """ # load climatology Xclim,lat,lon,lev,DR = load_climatology(E,climatology_option,hostname) # change the daterange in the anomalies to suit what was found for climatology if len(DR) != len(E['daterange']): print('Changing the experiment daterange to the dates found for the requested climatology') E['daterange'] = DR d1 = DR[0].strftime("%Y-%m-%d") d2 = DR[len(E['daterange'])-1].strftime("%Y-%m-%d") print('new daterange goes from '+d1+' to '+d2) # some climatologies are only available at daily resolution, so # in that case we have to change the daterange in E to be daily if (climatology_option == 'F_W4_L66'): d0 = E['daterange'][0] df = E['daterange'][len(E['daterange'])-1] days = df-d0 DRnew = dart.daterange(date_start=d0, periods=days.days+1, DT='1D') E['daterange'] = DRnew # load the desired model fields for the experiment Xlist = [] # empty list to hold the fields we retrieve for every day for date in E['daterange']: X,lat0,lon0,lev0 = DSS.compute_DART_diagn_from_model_h_files(E,date,hostname=hostname,verbose=verbose) if X is not None: Xs = np.squeeze(X) Xlist.append(Xs) lat = lat0 lon = lon0 lev = lev0 # check that the right vertical levels were loaded if verbose: print('------computing daily anomalies for the following vertical levels and variable:-------') print(lev) print(E['variable']) # compute anomalies: # for this we turn the model fields into a matrix and subtract from the climatology XX = np.concatenate([X[..., np.newaxis] for X in Xlist], axis=len(Xs.shape)) if climatology_option == None: AA = XX else: # if the climatology does not have shape lat x lon x lev x time, # run swapaxes 2x to get it as such # NOTE: this is still a kludge and probably wont work with all datasets - check this carefully # with your own data XclimS = np.squeeze(Xclim) nT = len(DRnew) lastdim = len(XclimS.shape)-1 for s,ii in zip(XclimS.shape,range(len(XclimS.shape))): if s == nT: time_dim = ii # if only retrieveing a single date, don't need to do any reshaping # but might need to squeeze out a length-one time dimension if nT == 1: XclimR = XclimS XX = np.squeeze(XX) else: # if time is the last dimension, don't need to reshape Xclim if time_dim == lastdim: XclimR = XclimS # if time is the first dimension, need to reshape Xclim if time_dim == 0: Xclim2 = XclimS.swapaxes(0,lastdim) XclimR = Xclim2.swapaxes(0,1) AA = XX-XclimR return AA,XclimR,lat,lon,lev,DR
def HRRS_mean_ztrop_to_csv(DR, hostname='taurus', debug=False): """ Given a certain daterange, retrieve available high res radiosonde data, compute the average tropopause height per station, and store in a csv file. """ from TIL import ztrop # first read in station information as a dataframe stationdata = HRRS_station_data(hostname) # because the HRRS data are sorted by years, loop over the years in the daterange y0 = DR[0].year yf = DR[len(DR) - 1].year years = range(y0, yf + 1, 1) for YYYY in years: # load a list of the available stations for that year Slist = HRRS_stations_available_per_year(YYYY) # also compute the subset of the requested daterange that fits into this year. year_daterange = dart.daterange(date_start=datetime.datetime( YYYY, 1, 1, 0, 0, 0), periods=365 * 4, DT='6H') DR2 = set(year_daterange).intersection(DR) # also find the dir where the station data live datadir = es.obs_data_paths('HRRS', hostname) # initialize empty dictionary to hold average tropoopause heights per station ztrop_dict = dict() # now loop over available stations, and for each one, retrieve the data # that fit into the requested daterange for s in Slist: ztrop_list = [ ] # empty list to hold tropopause heights for all available obs per station # loop over dates, and retrieve data if available for dd in DR2: datestr = dd.strftime("%Y%m%d%H") ff = datadir + '/' + str(YYYY) + '/' + str(s) + '/' + str( s) + '-' + datestr + '_mod.dat' if os.path.exists(ff): if debug: print(ff) # read in the station data D = read_HRRS_data(ff) # compute tropopause height z = D['Alt'] / 1E3 # Altitude in km T = D['Temp'] + 273.15 # Temp in Kelvin ztropp = ztrop(z=z, T=T, debug=debug, hostname=hostname) # add to list if not none if ztropp is not None: ztrop_list.append(ztropp) # average the tropopause heights and add to dictionary ztrop_dict[s] = np.mean(ztrop_list) # turn dict into data frame ZT = pd.Series(data=ztrop_dict, name='ztrop_mean') if debug: print(ZT) # turn dataframe into csv file hrrs_path = es.obs_data_paths('HRRS', hostname) datestr = DR[0].strftime("%Y%m%d") + '-' + DR[len(DR) - 1].strftime( "%Y%m%d") + '.csv' fname = hrrs_path + '/' + 'mean_tropopause_height_per_station_' + datestr print('storing file ' + fname) ZT.to_csv(fname, index=True, sep=',', header=True) return (ZT)
def HRRS_as_DF(OBS, TPbased=False, TPbased_vertical_res=50E-3, hostname='taurus', debug=False): """ Loop over a set of dates and a specified latitude- and longitude range, and return the available high-resolution radiosonde data as a pandas data frame INPUTS: OBS: a dictionary with the following entries: daterange: a list of datetime objects that give the desired date range latrange: a list giving the bounding latitudes of the desired range lonrange: a list giving the bounding longitudes of the desired range Note that OBS can be a DART experiment dictionary (see DART.py), but the DART/model specific entries are ignored. TPbased: set to True to return the profiles ordered into regularly-spaced altitudes relative to the tropopause - default is False. hostname: default is taurus debug: set to True to print some stuff out. Default is False. TPbased_vertical_res: resolution of the grid to which we inteprolate the obs doing TP-based coordinates. Default is 50m. """ # first read in station information as a dataframe stationdata = HRRS_station_data(hostname) # initialize an empy list which will hold the data frames for each station and time DFlist = [] # because the HRRS data are sorted by years, loop over the years in the daterange DR = OBS['daterange'] y0 = DR[0].year yf = DR[len(DR) - 1].year years = range(y0, yf + 1, 1) for YYYY in years: # load a list of the available stations for that year Slist = HRRS_stations_available_per_year(YYYY) # trim list down to the ones that fit into the latitude range stations_lat = [ s for s in Slist if stationdata.loc[int(s)]['Lat'] >= OBS['latrange'][0] and stationdata.loc[int(s)]['Lat'] <= OBS['latrange'][1] ] # trim list down to the ones that fit into the longitude range stations_latlon = [ s for s in stations_lat if stationdata.loc[int(s)]['Lon'] >= OBS['lonrange'][0] and stationdata.loc[int(s)]['Lon'] <= OBS['lonrange'][1] ] # also compute the subset of the requested daterange that fits into this year. year_daterange = dart.daterange(date_start=datetime.datetime( YYYY, 1, 1, 0, 0, 0), periods=365 * 4, DT='6H') DR2 = set(year_daterange).intersection(DR) # also find the dir where the station data live datadir = es.obs_data_paths('HRRS', hostname) # now loop over available stations, and for each one, retrieve the data # that fit into the requested daterange for s in stations_latlon: # loop over dates, and retrieve data if available for dd in DR2: datestr = dd.strftime("%Y%m%d%H") ff = datadir + '/' + str(YYYY) + '/' + str(s) + '/' + str( s) + '-' + datestr + '_mod.dat' if os.path.exists(ff): if debug: print(ff) # read in the station data if TPbased: D = TP_based_HRRS_data( ff, vertical_res_km=TPbased_vertical_res) alt_to_km = 1.0 # here the altitude is already in km temp_to_K = 0.0 else: D = read_HRRS_data(ff) alt_to_km = 1.0E-3 # raw data are in m -- convert to km temp_to_K = 273.15 # raw data need to be converted to kelvin if D is not None: # also add a column holding the date D['Date'] = pd.Series(dd, index=D.index) # also add a column holding the station number D['StationNumber'] = pd.Series(s, index=D.index) # make sure altitude is in km # and temp in Kelvin D['Alt'] = D['Alt'] * alt_to_km D['Temp'] = D['Temp'] + temp_to_K # get rid of some unneeded columns if not TPbased: useless_cols = [ 'Time', 'Dewpt', 'RH', 'Ucmp', 'Vcmp', 'spd', 'dir', 'Wcmp', 'Ele', 'Azi', 'Qp', 'Qt', 'Qrh', 'Qu', 'Qv', 'QdZ' ] D.drop(useless_cols, inplace=True, axis=1) # append to list of data frames DFlist.append(D) # merge the list of data frames into a single DF using list comprehension DFout = pd.concat(DFlist, axis=0) return (DFout)
def get_experiment_date_ranges(exp_name): # stored date ranges for various DART experiments DR = None # CAM experiments for ERP assimilation study if exp_name == 'NODA': DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 0, 0, 0), periods=31, DT='1D') if exp_name == 'ERPALL': DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 0, 0, 0), periods=31, DT='1D') if exp_name == 'RST': DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 0, 0, 0), periods=17, DT='1D') if exp_name == 'ERPRST': DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 0, 0, 0), periods=17, DT='1D') # DART-WACCM runs performed at GEOMAR if exp_name == 'PMO32': DR = dart.daterange(date_start=datetime.datetime(2009, 10, 1, 6, 0, 0), periods=31, DT='6H') if exp_name == 'W0910_NODA': DR = dart.daterange(date_start=datetime.datetime( 2009, 10, 1, 12, 0, 0), periods=596, DT='6H') if exp_name == 'W0910_GLOBAL': DR = dart.daterange(date_start=datetime.datetime( 2009, 10, 1, 12, 0, 0), periods=596, DT='6H') if exp_name == 'W0910_TROPICS': DR = dart.daterange(date_start=datetime.datetime( 2009, 10, 1, 12, 0, 0), periods=596, DT='6H') if exp_name == 'W0910_NODART': DR = dart.daterange(date_start=datetime.datetime( 2009, 10, 1, 12, 0, 0), periods=10, DT='6H') if exp_name == 'W0910_NOSTOP': DR = dart.daterange(date_start=datetime.datetime( 2009, 10, 1, 12, 0, 0), periods=64, DT='6H') # WACCM PMO runs performed by Nick Pedatella at NCAR if exp_name == 'NCAR_PMO_CONTROL': DR = dart.daterange(date_start=datetime.datetime(2008, 11, 6, 6, 0, 0), periods=72, DT='6H') if exp_name == 'NCAR_PMO_LAS': DR = dart.daterange(date_start=datetime.datetime(2008, 11, 6, 6, 0, 0), periods=72, DT='6H') if exp_name == 'NCAR_PMO_LA': DR = dart.daterange(date_start=datetime.datetime(2008, 11, 6, 6, 0, 0), periods=72, DT='6H') # WACCM real-obs runs performed by Nick Pedatella at NCAR if exp_name == 'NCAR_FULL': DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 6, 0, 0), periods=204, DT='6H') if exp_name == 'NCAR_LAONLY': DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 6, 0, 0), periods=204, DT='6H') if DR is None: print('find_paths Cannot find experiment ' + exp_name + ' returning...') return DR