Python obs_data_paths Examples, experiment_settings.obs_data_paths Python Examples

Example #1

0

Show file

def read_COSMIC_TPbased_mean(E, hostname='taurus'):
    """	
	Read in COSMIC temp and N2 data averaged with respect to the local tropopause. 
	"""

    from netCDF4 import Dataset

    # find the path to the data
    datadir = es.obs_data_paths('COSMIC', hostname)

    # TODO: right now this loads the only file I have. Later can code a dynamic way to
    # choose files based on what is specified in E.
    ff = 'mean_GPS-RO_45-60N_COSMIC_Jan2010_TPbased_mean.nc'
    filename = datadir + ff

    # open the file and read in the relevant data into dict
    D = dict()
    f = Dataset(filename, 'r')

    varnames = {'T': 'T', 'Nsq': 'N2'}
    variable = varnames[E['variable']]

    D['z'] = f.variables['z'][:]
    D['data'] = f.variables[variable][:]
    D['units'] = f.variables[variable].units

    # one last thing! Convert Celsius to Kelvin
    if 'Celsius' in f.variables[variable].units:
        D['data'] = f.variables[variable][:] + 273.5
        D['units'] = 'K'

    f.close()

    return D

Example #2

0

Show file

File: OBS.py Project: LisaNeef/DART-state-space

def HRRS_station_data(hostname='taurus'):

	"""
	Read in information about the high-res radiosondes and return it as a pandas dataframe.
	"""
	
	datadir = es.obs_data_paths('HRRS',hostname)

	ff=datadir+'ListOfStations.dat'
	colnames=[ 'WBAN','Station_Name','State','Country','WMO_Code','Lat','Lon','Height','Transition date']
	stations = pd.read_csv(ff,delimiter=",",error_bad_lines=False,skiprows=1,names=colnames,index_col='WBAN')


	# a few columns have to be coerced to numeric 
	stations[['Lat','Lon']] = stations[['Lat','Lon']].apply(pd.to_numeric, errors='coerce')

	return(stations)

Example #3

0

Show file

def HRRS_station_data(hostname='taurus'):
    """
	Read in information about the high-res radiosondes and return it as a pandas dataframe.
	"""

    datadir = es.obs_data_paths('HRRS', hostname)

    ff = datadir + 'ListOfStations.dat'
    colnames = [
        'WBAN', 'Station_Name', 'State', 'Country', 'WMO_Code', 'Lat', 'Lon',
        'Height', 'Transition date'
    ]
    stations = pd.read_csv(ff,
                           delimiter=",",
                           error_bad_lines=False,
                           skiprows=1,
                           names=colnames,
                           index_col='WBAN')

    # a few columns have to be coerced to numeric
    stations[['Lat', 'Lon']] = stations[['Lat', 'Lon']].apply(pd.to_numeric,
                                                              errors='coerce')

    return (stations)

Example #4

0

Show file

File: OBS.py Project: LisaNeef/DART-state-space

def HRRS_mean_ztrop_to_csv(DR,hostname='taurus',debug=False):

	"""
	Given a certain daterange, retrieve available high res radiosonde data,
	compute the average tropopause height per station, and store in a 
	csv file. 
	"""
	from TIL import ztrop

	# first read in station information as a dataframe 
	stationdata = HRRS_station_data(hostname)

	# because the HRRS data are sorted by years, loop over the years in the daterange
	y0 = DR[0].year
	yf = DR[len(DR)-1].year
	years = range(y0,yf+1,1)
	for YYYY in years:  

		# load a list of the available stations for that year  
		Slist  = HRRS_stations_available_per_year(YYYY)

		# also compute the subset of the requested daterange that fits into this year. 
		year_daterange =  dart.daterange(date_start=datetime.datetime(YYYY,1,1,0,0,0), periods=365*4, DT='6H')
		DR2 = set(year_daterange).intersection(DR)
		
		# also find the dir where the station data live 
		datadir = es.obs_data_paths('HRRS',hostname)

		# initialize empty dictionary to hold average tropoopause heights per station 
		ztrop_dict=dict()

		# now loop over available stations, and for each one, retrieve the data 
		# that fit into the requested daterange 

		for s in Slist:	
			ztrop_list=[]	# empty list to hold tropopause heights for all available obs per station 

			# loop over dates, and retrieve data if available 
			for dd in DR2:
				datestr = dd.strftime("%Y%m%d%H")
				ff = datadir+'/'+str(YYYY)+'/'+str(s)+'/'+str(s)+'-'+datestr+'_mod.dat'
				if os.path.exists(ff):

					if debug:
						print(ff)

					# read in the station data 
					D = read_HRRS_data(ff)
	
					# compute tropopause height 
					z=D['Alt']/1E3       # Altitude in km 
					T=D['Temp']+273.15      # Temp in Kelvin
					ztropp=ztrop(z=z,T=T,debug=debug,hostname=hostname)

					# add to list if not none  
					if ztropp is not None:
						ztrop_list.append(ztropp)

			# average the tropopause heights and add to dictionary 
			ztrop_dict[s]=np.mean(ztrop_list)

		# turn dict into data frame  
		ZT=pd.Series(data=ztrop_dict, name='ztrop_mean')

		if debug:
			print(ZT)

		# turn dataframe into csv file
		hrrs_path = es.obs_data_paths('HRRS',hostname)
		datestr = DR[0].strftime("%Y%m%d")+'-'+DR[len(DR)-1].strftime("%Y%m%d")+'.csv'
		fname=hrrs_path+'/'+'mean_tropopause_height_per_station_'+datestr
		print('storing file '+fname)
		ZT.to_csv(fname, index=True, sep=',',header=True) 

		return(ZT)

Example #5

0

Show file

File: OBS.py Project: LisaNeef/DART-state-space

def HRRS_as_DF(OBS,TPbased=False,TPbased_vertical_res=50E-3,hostname='taurus',debug=False):

	"""
	Loop over a set of dates and a specified latitude- and longitude range, and return 
	the available high-resolution radiosonde data as a pandas data frame  
	
	INPUTS:
	OBS: a dictionary with the following entries:  
		daterange: a list of datetime objects that give the desired date range  
		latrange: a list giving the bounding latitudes of the desired range 
		lonrange: a list giving the bounding longitudes of the desired range 
		Note that OBS can be a DART experiment dictionary (see DART.py), but the DART/model 
			specific entries are ignored. 
	TPbased: set to True to return the profiles ordered into regularly-spaced altitudes 
		relative to the tropopause  - default is False. 
	hostname: default is taurus 
	debug: set to True to print some stuff out. Default is False. 
	TPbased_vertical_res: resolution of the grid to which we inteprolate the obs doing TP-based 
		coordinates. Default is 50m. 
	"""

	# first read in station information as a dataframe 
	stationdata = HRRS_station_data(hostname)
	
	# initialize an empy list which will hold the data frames for each station and time 
	DFlist=[]

	# because the HRRS data are sorted by years, loop over the years in the daterange
	DR=OBS['daterange']
	y0 = DR[0].year
	yf = DR[len(DR)-1].year
	years = range(y0,yf+1,1)
	for YYYY in years:  

		# load a list of the available stations for that year  
		Slist  = HRRS_stations_available_per_year(YYYY)

		# trim list down to the ones that fit into the latitude range 
		stations_lat = [s for s in Slist 
				if stationdata.loc[int(s)]['Lat'] >= OBS['latrange'][0] 
				and stationdata.loc[int(s)]['Lat'] <= OBS['latrange'][1] ]

		# trim list down to the ones that fit into the longitude range 
		stations_latlon = [s for s in stations_lat
				if stationdata.loc[int(s)]['Lon'] >= OBS['lonrange'][0] 
				and stationdata.loc[int(s)]['Lon'] <= OBS['lonrange'][1] ]

		# also compute the subset of the requested daterange that fits into this year. 
		year_daterange =  dart.daterange(date_start=datetime.datetime(YYYY,1,1,0,0,0), periods=365*4, DT='6H')
		DR2 = set(year_daterange).intersection(DR)
		
		# also find the dir where the station data live 
		datadir = es.obs_data_paths('HRRS',hostname)

		# now loop over available stations, and for each one, retrieve the data 
		# that fit into the requested daterange 
		for s in stations_latlon:	

			# loop over dates, and retrieve data if available 
			for dd in DR2:
				datestr = dd.strftime("%Y%m%d%H")
				ff = datadir+'/'+str(YYYY)+'/'+str(s)+'/'+str(s)+'-'+datestr+'_mod.dat'
				if os.path.exists(ff):

					if debug:
						print(ff)

					# read in the station data 
					if TPbased:
						D = TP_based_HRRS_data(ff,vertical_res_km=TPbased_vertical_res)
						alt_to_km = 1.0    # here the altitude is already in km
						temp_to_K = 0.0
					else:
						D = read_HRRS_data(ff)
						alt_to_km = 1.0E-3     # raw data are in m -- convert to km 
						temp_to_K = 273.15	# raw data need to be converted to kelvin
		
					if D is not None:
						# also add a column holding the date 
						D['Date'] = pd.Series(dd, index=D.index)

						# also add a column holding the station number 
						D['StationNumber'] = pd.Series(s, index=D.index)

						# make sure altitude is in km 
						# and temp in Kelvin
						D['Alt']=D['Alt']*alt_to_km
						D['Temp']=D['Temp']+temp_to_K
					
						# get rid of some unneeded columns 
						if not TPbased:
							useless_cols=['Time','Dewpt','RH','Ucmp','Vcmp','spd','dir', 
									'Wcmp',  'Ele', 'Azi', 'Qp', 'Qt', 'Qrh', 'Qu', 'Qv', 'QdZ']
							D.drop(useless_cols,inplace=True,axis=1)

						# append to list of data frames 
						DFlist.append(D)


	# merge the list of data frames into a single DF using list comprehension 
	DFout = pd.concat(DFlist, axis=0)

	return(DFout)

Example #6

0

Show file

def HRRS_mean_ztrop_to_csv(DR, hostname='taurus', debug=False):
    """
	Given a certain daterange, retrieve available high res radiosonde data,
	compute the average tropopause height per station, and store in a 
	csv file. 
	"""
    from TIL import ztrop

    # first read in station information as a dataframe
    stationdata = HRRS_station_data(hostname)

    # because the HRRS data are sorted by years, loop over the years in the daterange
    y0 = DR[0].year
    yf = DR[len(DR) - 1].year
    years = range(y0, yf + 1, 1)
    for YYYY in years:

        # load a list of the available stations for that year
        Slist = HRRS_stations_available_per_year(YYYY)

        # also compute the subset of the requested daterange that fits into this year.
        year_daterange = dart.daterange(date_start=datetime.datetime(
            YYYY, 1, 1, 0, 0, 0),
                                        periods=365 * 4,
                                        DT='6H')
        DR2 = set(year_daterange).intersection(DR)

        # also find the dir where the station data live
        datadir = es.obs_data_paths('HRRS', hostname)

        # initialize empty dictionary to hold average tropoopause heights per station
        ztrop_dict = dict()

        # now loop over available stations, and for each one, retrieve the data
        # that fit into the requested daterange

        for s in Slist:
            ztrop_list = [
            ]  # empty list to hold tropopause heights for all available obs per station

            # loop over dates, and retrieve data if available
            for dd in DR2:
                datestr = dd.strftime("%Y%m%d%H")
                ff = datadir + '/' + str(YYYY) + '/' + str(s) + '/' + str(
                    s) + '-' + datestr + '_mod.dat'
                if os.path.exists(ff):

                    if debug:
                        print(ff)

                    # read in the station data
                    D = read_HRRS_data(ff)

                    # compute tropopause height
                    z = D['Alt'] / 1E3  # Altitude in km
                    T = D['Temp'] + 273.15  # Temp in Kelvin
                    ztropp = ztrop(z=z, T=T, debug=debug, hostname=hostname)

                    # add to list if not none
                    if ztropp is not None:
                        ztrop_list.append(ztropp)

            # average the tropopause heights and add to dictionary
            ztrop_dict[s] = np.mean(ztrop_list)

        # turn dict into data frame
        ZT = pd.Series(data=ztrop_dict, name='ztrop_mean')

        if debug:
            print(ZT)

        # turn dataframe into csv file
        hrrs_path = es.obs_data_paths('HRRS', hostname)
        datestr = DR[0].strftime("%Y%m%d") + '-' + DR[len(DR) - 1].strftime(
            "%Y%m%d") + '.csv'
        fname = hrrs_path + '/' + 'mean_tropopause_height_per_station_' + datestr
        print('storing file ' + fname)
        ZT.to_csv(fname, index=True, sep=',', header=True)

        return (ZT)

Example #7

0

Show file

def HRRS_as_DF(OBS,
               TPbased=False,
               TPbased_vertical_res=50E-3,
               hostname='taurus',
               debug=False):
    """
	Loop over a set of dates and a specified latitude- and longitude range, and return 
	the available high-resolution radiosonde data as a pandas data frame  
	
	INPUTS:
	OBS: a dictionary with the following entries:  
		daterange: a list of datetime objects that give the desired date range  
		latrange: a list giving the bounding latitudes of the desired range 
		lonrange: a list giving the bounding longitudes of the desired range 
		Note that OBS can be a DART experiment dictionary (see DART.py), but the DART/model 
			specific entries are ignored. 
	TPbased: set to True to return the profiles ordered into regularly-spaced altitudes 
		relative to the tropopause  - default is False. 
	hostname: default is taurus 
	debug: set to True to print some stuff out. Default is False. 
	TPbased_vertical_res: resolution of the grid to which we inteprolate the obs doing TP-based 
		coordinates. Default is 50m. 
	"""

    # first read in station information as a dataframe
    stationdata = HRRS_station_data(hostname)

    # initialize an empy list which will hold the data frames for each station and time
    DFlist = []

    # because the HRRS data are sorted by years, loop over the years in the daterange
    DR = OBS['daterange']
    y0 = DR[0].year
    yf = DR[len(DR) - 1].year
    years = range(y0, yf + 1, 1)
    for YYYY in years:

        # load a list of the available stations for that year
        Slist = HRRS_stations_available_per_year(YYYY)

        # trim list down to the ones that fit into the latitude range
        stations_lat = [
            s for s in Slist
            if stationdata.loc[int(s)]['Lat'] >= OBS['latrange'][0]
            and stationdata.loc[int(s)]['Lat'] <= OBS['latrange'][1]
        ]

        # trim list down to the ones that fit into the longitude range
        stations_latlon = [
            s for s in stations_lat
            if stationdata.loc[int(s)]['Lon'] >= OBS['lonrange'][0]
            and stationdata.loc[int(s)]['Lon'] <= OBS['lonrange'][1]
        ]

        # also compute the subset of the requested daterange that fits into this year.
        year_daterange = dart.daterange(date_start=datetime.datetime(
            YYYY, 1, 1, 0, 0, 0),
                                        periods=365 * 4,
                                        DT='6H')
        DR2 = set(year_daterange).intersection(DR)

        # also find the dir where the station data live
        datadir = es.obs_data_paths('HRRS', hostname)

        # now loop over available stations, and for each one, retrieve the data
        # that fit into the requested daterange
        for s in stations_latlon:

            # loop over dates, and retrieve data if available
            for dd in DR2:
                datestr = dd.strftime("%Y%m%d%H")
                ff = datadir + '/' + str(YYYY) + '/' + str(s) + '/' + str(
                    s) + '-' + datestr + '_mod.dat'
                if os.path.exists(ff):

                    if debug:
                        print(ff)

                    # read in the station data
                    if TPbased:
                        D = TP_based_HRRS_data(
                            ff, vertical_res_km=TPbased_vertical_res)
                        alt_to_km = 1.0  # here the altitude is already in km
                        temp_to_K = 0.0
                    else:
                        D = read_HRRS_data(ff)
                        alt_to_km = 1.0E-3  # raw data are in m -- convert to km
                        temp_to_K = 273.15  # raw data need to be converted to kelvin

                    if D is not None:
                        # also add a column holding the date
                        D['Date'] = pd.Series(dd, index=D.index)

                        # also add a column holding the station number
                        D['StationNumber'] = pd.Series(s, index=D.index)

                        # make sure altitude is in km
                        # and temp in Kelvin
                        D['Alt'] = D['Alt'] * alt_to_km
                        D['Temp'] = D['Temp'] + temp_to_K

                        # get rid of some unneeded columns
                        if not TPbased:
                            useless_cols = [
                                'Time', 'Dewpt', 'RH', 'Ucmp', 'Vcmp', 'spd',
                                'dir', 'Wcmp', 'Ele', 'Azi', 'Qp', 'Qt', 'Qrh',
                                'Qu', 'Qv', 'QdZ'
                            ]
                            D.drop(useless_cols, inplace=True, axis=1)

                        # append to list of data frames
                        DFlist.append(D)

    # merge the list of data frames into a single DF using list comprehension
    DFout = pd.concat(DFlist, axis=0)

    return (DFout)