def get_available_date_range(exp_name):

	"""
	given some existing DART experiment, return the daterange of all currently available data 
	"""

	N = {'W0910_GLOBAL' : dart.daterange(date_start=datetime.datetime(2009,10,1,0,0,0), periods=380, DT='6H'),
		'W0910_NODA' :dart.daterange(date_start=datetime.datetime(2009,10,1,0,0,0), periods=640, DT='6H'),
	}
	return N[exp_name]
Exemple #2
0
def get_available_date_range(exp_name):
    """
	given some existing DART experiment, return the daterange of all currently available data 
	"""

    N = {
        'W0910_GLOBAL':
        dart.daterange(date_start=datetime.datetime(2009, 10, 1, 0, 0, 0),
                       periods=380,
                       DT='6H'),
        'W0910_NODA':
        dart.daterange(date_start=datetime.datetime(2009, 10, 1, 0, 0, 0),
                       periods=640,
                       DT='6H'),
    }
    return N[exp_name]
Exemple #3
0
def HRRS_mean_ztrop_to_csv(DR,hostname='taurus',debug=False):

	"""
	Given a certain daterange, retrieve available high res radiosonde data,
	compute the average tropopause height per station, and store in a 
	csv file. 
	"""
	from TIL import ztrop

	# first read in station information as a dataframe 
	stationdata = HRRS_station_data(hostname)

	# because the HRRS data are sorted by years, loop over the years in the daterange
	y0 = DR[0].year
	yf = DR[len(DR)-1].year
	years = range(y0,yf+1,1)
	for YYYY in years:  

		# load a list of the available stations for that year  
		Slist  = HRRS_stations_available_per_year(YYYY)

		# also compute the subset of the requested daterange that fits into this year. 
		year_daterange =  dart.daterange(date_start=datetime.datetime(YYYY,1,1,0,0,0), periods=365*4, DT='6H')
		DR2 = set(year_daterange).intersection(DR)
		
		# also find the dir where the station data live 
		datadir = es.obs_data_paths('HRRS',hostname)

		# initialize empty dictionary to hold average tropoopause heights per station 
		ztrop_dict=dict()

		# now loop over available stations, and for each one, retrieve the data 
		# that fit into the requested daterange 

		for s in Slist:	
			ztrop_list=[]	# empty list to hold tropopause heights for all available obs per station 

			# loop over dates, and retrieve data if available 
			for dd in DR2:
				datestr = dd.strftime("%Y%m%d%H")
				ff = datadir+'/'+str(YYYY)+'/'+str(s)+'/'+str(s)+'-'+datestr+'_mod.dat'
				if os.path.exists(ff):

					if debug:
						print(ff)

					# read in the station data 
					D = read_HRRS_data(ff)
	
					# compute tropopause height 
					z=D['Alt']/1E3       # Altitude in km 
					T=D['Temp']+273.15      # Temp in Kelvin
					ztropp=ztrop(z=z,T=T,debug=debug,hostname=hostname)

					# add to list if not none  
					if ztropp is not None:
						ztrop_list.append(ztropp)

			# average the tropopause heights and add to dictionary 
			ztrop_dict[s]=np.mean(ztrop_list)

		# turn dict into data frame  
		ZT=pd.Series(data=ztrop_dict, name='ztrop_mean')

		if debug:
			print(ZT)

		# turn dataframe into csv file
		hrrs_path = es.obs_data_paths('HRRS',hostname)
		datestr = DR[0].strftime("%Y%m%d")+'-'+DR[len(DR)-1].strftime("%Y%m%d")+'.csv'
		fname=hrrs_path+'/'+'mean_tropopause_height_per_station_'+datestr
		print('storing file '+fname)
		ZT.to_csv(fname, index=True, sep=',',header=True) 

		return(ZT)
Exemple #4
0
def HRRS_as_DF(OBS,TPbased=False,TPbased_vertical_res=50E-3,hostname='taurus',debug=False):

	"""
	Loop over a set of dates and a specified latitude- and longitude range, and return 
	the available high-resolution radiosonde data as a pandas data frame  
	
	INPUTS:
	OBS: a dictionary with the following entries:  
		daterange: a list of datetime objects that give the desired date range  
		latrange: a list giving the bounding latitudes of the desired range 
		lonrange: a list giving the bounding longitudes of the desired range 
		Note that OBS can be a DART experiment dictionary (see DART.py), but the DART/model 
			specific entries are ignored. 
	TPbased: set to True to return the profiles ordered into regularly-spaced altitudes 
		relative to the tropopause  - default is False. 
	hostname: default is taurus 
	debug: set to True to print some stuff out. Default is False. 
	TPbased_vertical_res: resolution of the grid to which we inteprolate the obs doing TP-based 
		coordinates. Default is 50m. 
	"""

	# first read in station information as a dataframe 
	stationdata = HRRS_station_data(hostname)
	
	# initialize an empy list which will hold the data frames for each station and time 
	DFlist=[]

	# because the HRRS data are sorted by years, loop over the years in the daterange
	DR=OBS['daterange']
	y0 = DR[0].year
	yf = DR[len(DR)-1].year
	years = range(y0,yf+1,1)
	for YYYY in years:  

		# load a list of the available stations for that year  
		Slist  = HRRS_stations_available_per_year(YYYY)

		# trim list down to the ones that fit into the latitude range 
		stations_lat = [s for s in Slist 
				if stationdata.loc[int(s)]['Lat'] >= OBS['latrange'][0] 
				and stationdata.loc[int(s)]['Lat'] <= OBS['latrange'][1] ]

		# trim list down to the ones that fit into the longitude range 
		stations_latlon = [s for s in stations_lat
				if stationdata.loc[int(s)]['Lon'] >= OBS['lonrange'][0] 
				and stationdata.loc[int(s)]['Lon'] <= OBS['lonrange'][1] ]

		# also compute the subset of the requested daterange that fits into this year. 
		year_daterange =  dart.daterange(date_start=datetime.datetime(YYYY,1,1,0,0,0), periods=365*4, DT='6H')
		DR2 = set(year_daterange).intersection(DR)
		
		# also find the dir where the station data live 
		datadir = es.obs_data_paths('HRRS',hostname)

		# now loop over available stations, and for each one, retrieve the data 
		# that fit into the requested daterange 
		for s in stations_latlon:	

			# loop over dates, and retrieve data if available 
			for dd in DR2:
				datestr = dd.strftime("%Y%m%d%H")
				ff = datadir+'/'+str(YYYY)+'/'+str(s)+'/'+str(s)+'-'+datestr+'_mod.dat'
				if os.path.exists(ff):

					if debug:
						print(ff)

					# read in the station data 
					if TPbased:
						D = TP_based_HRRS_data(ff,vertical_res_km=TPbased_vertical_res)
						alt_to_km = 1.0    # here the altitude is already in km
						temp_to_K = 0.0
					else:
						D = read_HRRS_data(ff)
						alt_to_km = 1.0E-3     # raw data are in m -- convert to km 
						temp_to_K = 273.15	# raw data need to be converted to kelvin
		
					if D is not None:
						# also add a column holding the date 
						D['Date'] = pd.Series(dd, index=D.index)

						# also add a column holding the station number 
						D['StationNumber'] = pd.Series(s, index=D.index)

						# make sure altitude is in km 
						# and temp in Kelvin
						D['Alt']=D['Alt']*alt_to_km
						D['Temp']=D['Temp']+temp_to_K
					
						# get rid of some unneeded columns 
						if not TPbased:
							useless_cols=['Time','Dewpt','RH','Ucmp','Vcmp','spd','dir', 
									'Wcmp',  'Ele', 'Azi', 'Qp', 'Qt', 'Qrh', 'Qu', 'Qv', 'QdZ']
							D.drop(useless_cols,inplace=True,axis=1)

						# append to list of data frames 
						DFlist.append(D)


	# merge the list of data frames into a single DF using list comprehension 
	DFout = pd.concat(DFlist, axis=0)

	return(DFout)
def get_experiment_date_ranges(exp_name):

	# stored date ranges for various DART experiments  
	DR = None

	# CAM experiments for ERP assimilation study  
	if exp_name == 'NODA':
		DR = dart.daterange(date_start=datetime.datetime(2009,1,1,0,0,0), periods=31, DT='1D')
	if exp_name == 'ERPALL':
		DR = dart.daterange(date_start=datetime.datetime(2009,1,1,0,0,0), periods=31, DT='1D')
	if exp_name == 'RST':
		DR = dart.daterange(date_start=datetime.datetime(2009,1,1,0,0,0), periods=17, DT='1D')
	if exp_name == 'ERPRST':
		DR = dart.daterange(date_start=datetime.datetime(2009,1,1,0,0,0), periods=17, DT='1D')

	# DART-WACCM runs performed at GEOMAR  
	if exp_name == 'PMO32':
		DR = dart.daterange(date_start=datetime.datetime(2009,10,1,6,0,0), periods=31, DT='6H')
	if exp_name == 'W0910_NODA':
		DR = dart.daterange(date_start=datetime.datetime(2009,10,1,12,0,0), periods=596, DT='6H')
	if exp_name == 'W0910_GLOBAL':
		DR = dart.daterange(date_start=datetime.datetime(2009,10,1,12,0,0), periods=596, DT='6H')
	if exp_name == 'W0910_TROPICS':
		DR = dart.daterange(date_start=datetime.datetime(2009,10,1,12,0,0), periods=596, DT='6H')
	if exp_name == 'W0910_NODART':
		DR = dart.daterange(date_start=datetime.datetime(2009,10,1,12,0,0), periods=10, DT='6H')
	if exp_name == 'W0910_NOSTOP':
		DR = dart.daterange(date_start=datetime.datetime(2009,10,1,12,0,0), periods=64, DT='6H')
	
	# WACCM PMO runs performed by Nick Pedatella at NCAR
	if exp_name == 'NCAR_PMO_CONTROL':
		DR = dart.daterange(date_start=datetime.datetime(2008,11,6,6,0,0), periods=72, DT='6H')
	if exp_name == 'NCAR_PMO_LAS':
		DR = dart.daterange(date_start=datetime.datetime(2008,11,6,6,0,0), periods=72, DT='6H')
	if exp_name == 'NCAR_PMO_LA':
		DR = dart.daterange(date_start=datetime.datetime(2008,11,6,6,0,0), periods=72, DT='6H')

	# WACCM real-obs runs performed by Nick Pedatella at NCAR
	if exp_name == 'NCAR_FULL':
		DR = dart.daterange(date_start=datetime.datetime(2009,1,1,6,0,0), periods=204, DT='6H')
	if exp_name == 'NCAR_LAONLY':
		DR = dart.daterange(date_start=datetime.datetime(2009,1,1,6,0,0), periods=204, DT='6H')

	if DR is None:
		print('find_paths Cannot find experiment '+exp_name+' returning...')

	return DR
Exemple #6
0
def ano(E,climatology_option = 'NODA',hostname='taurus',verbose=False):

	"""
	Compute anomaly fields relative to some climatology

	Inputs allowed for climatology_option:  
	'NODA': take the ensemble mean of the corresponding no-DA experiment as a 40-year climatology  
	'F_W4_L66': daily climatology of a CESM+WACCM simulation with realistic forcings, 1951-2010
	None: don't subtract out anything -- just return the regular fields in the same shape as other "anomalies"  
	"""

	# load climatology 
	Xclim,lat,lon,lev,DR = load_climatology(E,climatology_option,hostname)

	# change the daterange in the anomalies to suit what was found for climatology  
	if len(DR) != len(E['daterange']):
		print('Changing the experiment daterange to the dates found for the requested climatology')
		E['daterange'] = DR
		d1 = DR[0].strftime("%Y-%m-%d")
		d2 = DR[len(E['daterange'])-1].strftime("%Y-%m-%d")
		print('new daterange goes from '+d1+' to '+d2)

	# some climatologies are only available at daily resolution, so 
	# in that case we have to change the daterange in E to be daily  
	if (climatology_option == 'F_W4_L66'):
		d0 = E['daterange'][0]
		df = E['daterange'][len(E['daterange'])-1]
		days = df-d0
		DRnew =  dart.daterange(date_start=d0, periods=days.days+1, DT='1D')
		E['daterange'] = DRnew

	# load the desired model fields for the experiment
	Xlist = []	# empty list to hold the fields we retrieve for every day  
	for date in E['daterange']:
		X,lat0,lon0,lev0 = DSS.compute_DART_diagn_from_model_h_files(E,date,hostname=hostname,verbose=verbose)
		if X is not None:
			Xs = np.squeeze(X)
			Xlist.append(Xs)
			lat = lat0
			lon = lon0
			lev = lev0

	# check that the right vertical levels were loaded
	if verbose:
		print('------computing daily anomalies for the following vertical levels and variable:-------')
		print(lev)
		print(E['variable'])

	# compute anomalies:
	# for this we turn the model fields into a matrix and subtract from the climatology
	XX = np.concatenate([X[..., np.newaxis] for X in Xlist], axis=len(Xs.shape))
	if climatology_option == None:
		AA = XX
	else:
		# if the climatology does not have shape lat x lon x lev x time, 
		# run swapaxes 2x to get it as such  
		# NOTE: this is still a kludge and probably wont work with all datasets - check this carefully 
		# with your own data 
		XclimS = np.squeeze(Xclim)
		nT = len(DRnew)
		lastdim = len(XclimS.shape)-1
		for s,ii in zip(XclimS.shape,range(len(XclimS.shape))):
			if s == nT:
				time_dim = ii

		# if only retrieveing a single date, don't need to do any reshaping
		# but might need to squeeze out a length-one time dimension
		if nT == 1:
			XclimR = XclimS
			XX = np.squeeze(XX)
		else:
			# if time is the last dimension, don't need to reshape Xclim 
			if time_dim == lastdim: 
				XclimR = XclimS
			# if time is the first dimension, need to reshape Xclim
			if time_dim == 0:	
				Xclim2 = XclimS.swapaxes(0,lastdim)
				XclimR = Xclim2.swapaxes(0,1)


		AA = XX-XclimR

	return AA,XclimR,lat,lon,lev,DR
Exemple #7
0
def HRRS_mean_ztrop_to_csv(DR, hostname='taurus', debug=False):
    """
	Given a certain daterange, retrieve available high res radiosonde data,
	compute the average tropopause height per station, and store in a 
	csv file. 
	"""
    from TIL import ztrop

    # first read in station information as a dataframe
    stationdata = HRRS_station_data(hostname)

    # because the HRRS data are sorted by years, loop over the years in the daterange
    y0 = DR[0].year
    yf = DR[len(DR) - 1].year
    years = range(y0, yf + 1, 1)
    for YYYY in years:

        # load a list of the available stations for that year
        Slist = HRRS_stations_available_per_year(YYYY)

        # also compute the subset of the requested daterange that fits into this year.
        year_daterange = dart.daterange(date_start=datetime.datetime(
            YYYY, 1, 1, 0, 0, 0),
                                        periods=365 * 4,
                                        DT='6H')
        DR2 = set(year_daterange).intersection(DR)

        # also find the dir where the station data live
        datadir = es.obs_data_paths('HRRS', hostname)

        # initialize empty dictionary to hold average tropoopause heights per station
        ztrop_dict = dict()

        # now loop over available stations, and for each one, retrieve the data
        # that fit into the requested daterange

        for s in Slist:
            ztrop_list = [
            ]  # empty list to hold tropopause heights for all available obs per station

            # loop over dates, and retrieve data if available
            for dd in DR2:
                datestr = dd.strftime("%Y%m%d%H")
                ff = datadir + '/' + str(YYYY) + '/' + str(s) + '/' + str(
                    s) + '-' + datestr + '_mod.dat'
                if os.path.exists(ff):

                    if debug:
                        print(ff)

                    # read in the station data
                    D = read_HRRS_data(ff)

                    # compute tropopause height
                    z = D['Alt'] / 1E3  # Altitude in km
                    T = D['Temp'] + 273.15  # Temp in Kelvin
                    ztropp = ztrop(z=z, T=T, debug=debug, hostname=hostname)

                    # add to list if not none
                    if ztropp is not None:
                        ztrop_list.append(ztropp)

            # average the tropopause heights and add to dictionary
            ztrop_dict[s] = np.mean(ztrop_list)

        # turn dict into data frame
        ZT = pd.Series(data=ztrop_dict, name='ztrop_mean')

        if debug:
            print(ZT)

        # turn dataframe into csv file
        hrrs_path = es.obs_data_paths('HRRS', hostname)
        datestr = DR[0].strftime("%Y%m%d") + '-' + DR[len(DR) - 1].strftime(
            "%Y%m%d") + '.csv'
        fname = hrrs_path + '/' + 'mean_tropopause_height_per_station_' + datestr
        print('storing file ' + fname)
        ZT.to_csv(fname, index=True, sep=',', header=True)

        return (ZT)
Exemple #8
0
def HRRS_as_DF(OBS,
               TPbased=False,
               TPbased_vertical_res=50E-3,
               hostname='taurus',
               debug=False):
    """
	Loop over a set of dates and a specified latitude- and longitude range, and return 
	the available high-resolution radiosonde data as a pandas data frame  
	
	INPUTS:
	OBS: a dictionary with the following entries:  
		daterange: a list of datetime objects that give the desired date range  
		latrange: a list giving the bounding latitudes of the desired range 
		lonrange: a list giving the bounding longitudes of the desired range 
		Note that OBS can be a DART experiment dictionary (see DART.py), but the DART/model 
			specific entries are ignored. 
	TPbased: set to True to return the profiles ordered into regularly-spaced altitudes 
		relative to the tropopause  - default is False. 
	hostname: default is taurus 
	debug: set to True to print some stuff out. Default is False. 
	TPbased_vertical_res: resolution of the grid to which we inteprolate the obs doing TP-based 
		coordinates. Default is 50m. 
	"""

    # first read in station information as a dataframe
    stationdata = HRRS_station_data(hostname)

    # initialize an empy list which will hold the data frames for each station and time
    DFlist = []

    # because the HRRS data are sorted by years, loop over the years in the daterange
    DR = OBS['daterange']
    y0 = DR[0].year
    yf = DR[len(DR) - 1].year
    years = range(y0, yf + 1, 1)
    for YYYY in years:

        # load a list of the available stations for that year
        Slist = HRRS_stations_available_per_year(YYYY)

        # trim list down to the ones that fit into the latitude range
        stations_lat = [
            s for s in Slist
            if stationdata.loc[int(s)]['Lat'] >= OBS['latrange'][0]
            and stationdata.loc[int(s)]['Lat'] <= OBS['latrange'][1]
        ]

        # trim list down to the ones that fit into the longitude range
        stations_latlon = [
            s for s in stations_lat
            if stationdata.loc[int(s)]['Lon'] >= OBS['lonrange'][0]
            and stationdata.loc[int(s)]['Lon'] <= OBS['lonrange'][1]
        ]

        # also compute the subset of the requested daterange that fits into this year.
        year_daterange = dart.daterange(date_start=datetime.datetime(
            YYYY, 1, 1, 0, 0, 0),
                                        periods=365 * 4,
                                        DT='6H')
        DR2 = set(year_daterange).intersection(DR)

        # also find the dir where the station data live
        datadir = es.obs_data_paths('HRRS', hostname)

        # now loop over available stations, and for each one, retrieve the data
        # that fit into the requested daterange
        for s in stations_latlon:

            # loop over dates, and retrieve data if available
            for dd in DR2:
                datestr = dd.strftime("%Y%m%d%H")
                ff = datadir + '/' + str(YYYY) + '/' + str(s) + '/' + str(
                    s) + '-' + datestr + '_mod.dat'
                if os.path.exists(ff):

                    if debug:
                        print(ff)

                    # read in the station data
                    if TPbased:
                        D = TP_based_HRRS_data(
                            ff, vertical_res_km=TPbased_vertical_res)
                        alt_to_km = 1.0  # here the altitude is already in km
                        temp_to_K = 0.0
                    else:
                        D = read_HRRS_data(ff)
                        alt_to_km = 1.0E-3  # raw data are in m -- convert to km
                        temp_to_K = 273.15  # raw data need to be converted to kelvin

                    if D is not None:
                        # also add a column holding the date
                        D['Date'] = pd.Series(dd, index=D.index)

                        # also add a column holding the station number
                        D['StationNumber'] = pd.Series(s, index=D.index)

                        # make sure altitude is in km
                        # and temp in Kelvin
                        D['Alt'] = D['Alt'] * alt_to_km
                        D['Temp'] = D['Temp'] + temp_to_K

                        # get rid of some unneeded columns
                        if not TPbased:
                            useless_cols = [
                                'Time', 'Dewpt', 'RH', 'Ucmp', 'Vcmp', 'spd',
                                'dir', 'Wcmp', 'Ele', 'Azi', 'Qp', 'Qt', 'Qrh',
                                'Qu', 'Qv', 'QdZ'
                            ]
                            D.drop(useless_cols, inplace=True, axis=1)

                        # append to list of data frames
                        DFlist.append(D)

    # merge the list of data frames into a single DF using list comprehension
    DFout = pd.concat(DFlist, axis=0)

    return (DFout)
Exemple #9
0
def get_experiment_date_ranges(exp_name):

    # stored date ranges for various DART experiments
    DR = None

    # CAM experiments for ERP assimilation study
    if exp_name == 'NODA':
        DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 0, 0, 0),
                            periods=31,
                            DT='1D')
    if exp_name == 'ERPALL':
        DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 0, 0, 0),
                            periods=31,
                            DT='1D')
    if exp_name == 'RST':
        DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 0, 0, 0),
                            periods=17,
                            DT='1D')
    if exp_name == 'ERPRST':
        DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 0, 0, 0),
                            periods=17,
                            DT='1D')

    # DART-WACCM runs performed at GEOMAR
    if exp_name == 'PMO32':
        DR = dart.daterange(date_start=datetime.datetime(2009, 10, 1, 6, 0, 0),
                            periods=31,
                            DT='6H')
    if exp_name == 'W0910_NODA':
        DR = dart.daterange(date_start=datetime.datetime(
            2009, 10, 1, 12, 0, 0),
                            periods=596,
                            DT='6H')
    if exp_name == 'W0910_GLOBAL':
        DR = dart.daterange(date_start=datetime.datetime(
            2009, 10, 1, 12, 0, 0),
                            periods=596,
                            DT='6H')
    if exp_name == 'W0910_TROPICS':
        DR = dart.daterange(date_start=datetime.datetime(
            2009, 10, 1, 12, 0, 0),
                            periods=596,
                            DT='6H')
    if exp_name == 'W0910_NODART':
        DR = dart.daterange(date_start=datetime.datetime(
            2009, 10, 1, 12, 0, 0),
                            periods=10,
                            DT='6H')
    if exp_name == 'W0910_NOSTOP':
        DR = dart.daterange(date_start=datetime.datetime(
            2009, 10, 1, 12, 0, 0),
                            periods=64,
                            DT='6H')

    # WACCM PMO runs performed by Nick Pedatella at NCAR
    if exp_name == 'NCAR_PMO_CONTROL':
        DR = dart.daterange(date_start=datetime.datetime(2008, 11, 6, 6, 0, 0),
                            periods=72,
                            DT='6H')
    if exp_name == 'NCAR_PMO_LAS':
        DR = dart.daterange(date_start=datetime.datetime(2008, 11, 6, 6, 0, 0),
                            periods=72,
                            DT='6H')
    if exp_name == 'NCAR_PMO_LA':
        DR = dart.daterange(date_start=datetime.datetime(2008, 11, 6, 6, 0, 0),
                            periods=72,
                            DT='6H')

    # WACCM real-obs runs performed by Nick Pedatella at NCAR
    if exp_name == 'NCAR_FULL':
        DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 6, 0, 0),
                            periods=204,
                            DT='6H')
    if exp_name == 'NCAR_LAONLY':
        DR = dart.daterange(date_start=datetime.datetime(2009, 1, 1, 6, 0, 0),
                            periods=204,
                            DT='6H')

    if DR is None:
        print('find_paths Cannot find experiment ' + exp_name +
              ' returning...')

    return DR