def load_obs(self, forecast_hour=6, madis=True, variance=False): """ Loads the observations corresponding with n hours after ensemble was initialized (default 6 hours). Returns the observations from the text file. -Loads MADIS observations if madis=True, loads gridded observations if False. -If variance == True, loads gridded observations which also contain ensemble variance at the ob location. -Gridded and MADIS observations must be generated/obtained before calling this function. Returns a list of observations. """ dty, dtm, dtd, dth = ef.dt_str_timedelta(self.date,forecast_hour) ob_str = ef.var_string([self.ob_type]) # directory where the observations are if madis==True: obs_file = '/home/disk/hot/stangen/Documents/surface_obs/MADIS/'+dty+dtm+'/combined_'+self.ob_type+'/'+self.ob_type+'_'+dty+dtm+dtd+'_'+dth+'00.txt' elif madis==False: if self.new_format == False: obs_file = '/home/disk/hot/stangen/Documents/gridded_obs/'+self.ens_type+'/'+dty+dtm+'/'+self.ob_type+'/'+self.ob_type+'_'+dty+dtm+dtd+'_'+dth+'00' elif self.new_format == True: obs_file = '/home/disk/hot/stangen/Documents/gridded_obs/'+self.ens_type+'/'+dty+dtm+'/'+self.ob_type+'/'+dty+dtm+dtd+'_'+dth+'00_'+str(self.l)+'_'+str(self.r)+'_'+str(self.t)+'_'+str(self.b)+'_'+str(self.s) if variance==True: obs_file += '_variance' obs_file += '.txt' print('loading '+ob_str+' obs from '+dty+dtm+dtd+'_'+dth+'00') f1 = open(obs_file, 'r') obs = f1.readlines() return obs
def retreive_tigge_data(date): y = date.strftime('%Y') m = date.strftime('%m') d = date.strftime('%d') h = date.strftime('%H') save_dir = '/home/disk/hot/stangen/Documents/tigge_ensembles/' + ens + '/' + y + m + '/' #Create directories if they don't yet exit if (os.path.isdir(save_dir)): pass else: os.makedirs(save_dir) date = y + '-' + m + '-' + d if surface == True: target = save_dir + date + '_' + h + '_' + ens + '_' + var_string + '_sfc.nc' tigge_pf_sfc_request(date, h, target) elif surface == False: levstr = ef.var_string(levels) target = save_dir + date + '_' + h + '_' + ens + '_' + var_string + '_' + levstr + '_pl.nc' tigge_pf_pl_request(date, h, target)
def __init__(self,date,ens_type,prior_vrbls,ob_type,update_var=[],post_vrbls=[], grid=[], new_format=False, efh=54): self.date = date self.y = self.date.strftime('%Y') self.m = self.date.strftime('%m') self.d = self.date.strftime('%d') self.h = self.date.strftime('%H') self.ens_type = ens_type self.var_string = ef.var_string(prior_vrbls) #convert list of vrbls to string self.post_vrbls = post_vrbls #self.vrbls = vrbls self.ob_type = ob_type self.update_var = update_var #this allows an empty grid to be input try: self.l = grid[0] self.r = grid[1] self.t = grid[2] self.b = grid[3] self.s = grid[4] except: pass self.new_format = new_format self.efh = str(efh)+'hrs'
def run_efa(ob_type, update_var): #assign the radius to the localization_radius variable in Observation #create a string for saving to specific directories if loc_type == 'GC': loc_rad = localize_radius loc_str = str(loc_rad) else: loc_rad = None loc_str = '_stat_sig' time_lag = False #a list of dates to loop through to load each forecast initialized on these dates y = date.strftime('%Y') m = date.strftime('%m') d = date.strftime('%d') h = date.strftime('%H') observations = [] for o, o_type in enumerate(ob_type): efa = Load_Data(date, ensemble_type, variables, o_type, update_var) #only need to load the netCDF once (first time through the obtype loop) if o == 0: #initialize an instance of the Load_data class (load in data) statecls, lats, lons, elevs = efa.load_netcdfs() #load in the obs file obs = efa.load_obs() #loop through each line in the text file (loop through each observation) for ob in obs: #this gets the observation information from the text file ob_dict = mt.get_ob_info(ob) #get longitude positive-definite- ie -130 lon is 230 E if ob_dict['lon'] < 0: ob_dict['lon'] = ob_dict['lon'] + 360 utctime = datetime.utcfromtimestamp(ob_dict['time']) #check elevation of 4 nearest gridpoints to see whether or not to assimilate ob TorF = ef.closest_points(ob_dict['lat'], ob_dict['lon'], lats, lons, ob_dict['elev'], elevs) #fill the observation class object with information for assimilation obser = Observation(value=ob_dict['ob'], time=utctime, lat=ob_dict['lat'], lon=ob_dict['lon'], obtype=o_type, localize_radius=loc_rad, assimilate_this=TorF, error=1) observations.append(obser) print('loaded ' + str(len(observations)) + ' obs for assimilation') # ob1 = Observation(value=10000.25, time=datetime(2013,4,1,6),lat=24.55,lon=278.21, # obtype = ob_type[0], localize_radius=1000, assimilate_this=True, # error=1) # # # observations.append(ob1) # # Put the state class object and observation objects into EnSRF object assimilator = EnSRF(statecls, observations, loc=loc_type) # Update the prior with EFA- post_state is an EnsembleState object post_state, post_obs = assimilator.update() state = post_state #build the string of where to save the files outdir = '/home/disk/hot/stangen/Documents/posterior_ensembles/' #are the observations only updating their corresponding variable, or #are they updating all variables? if self_update == True: outdir += 'ob_update_self/' elif self_update == False: outdir += 'ob_update_all/' #directory for the type of localization used outdir += 'loc_' + loc_str + '/' #if dealing with time-lagged ensembles, add another directory if time_lag == True: outdir += 'time_lag/' #add directories for the ens type and year & month outdir += ensemble_type + '/' + y + m + '/' #Create output directories if they don't yet exist if (os.path.isdir(outdir)): pass else: os.makedirs(outdir) outdir_date_ens = outdir + y + 'T2M-' + m + '-' + d + '_' + h + '_' + ensemble_type #if we are only updating the variable type that matches the observation type if self_update == True: for var in state.vars(): checkfile = outdir_date_ens + '*' # If we're writing a single variable and the other variable already exists, # append to that file existing_file = glob.glob(checkfile) #if the other variable already exists if existing_file != []: #when only working with 2 variables, this should hold true last_ob = True print('Appending to existing file!') #should only be one file in there, convert from list to string existing_file = existing_file[0] # append to the existing file with Dataset(existing_file, 'a') as dset: print('Writing variable {}'.format(var)) dset.createVariable(var, np.float32, ( 'time', 'lat', 'lon', 'ens', )) dset.variables[var].units = ut.get_units(var) dset.variables[var][:] = state[var].values #if the filename already contains a .nc because we created the #file we are appending to separately from this run, delete #the .nc existing_file = existing_file.replace('.nc', '') # Rename the checkfile so the filename no longer specifies a # single variable type newfile = existing_file + '_' + ef.var_string(ob_type) if last_ob == True: newfile += '.nc' print('Done!') os.system('mv {} {}'.format(existing_file, newfile)) # ALL DONE!! else: # If the checkfile does not exist, make a new file outfile = outdir_date_ens + '_' + ef.var_string(ob_type) #if we are assimilating only one type of observation if len(obs_type) == 1: outfile = outfile + '.nc' ef.make_netcdf(state, outfile) #if we are updating all variable types with the observation (regardless of its type) elif self_update == False: outfile = outdir_date_ens + '_' + ef.var_string(ob_type) + '.nc' ef.make_netcdf(state, outfile)
figsize1 = 18 figsize2 = 12 #----------------------------------------------------------------------------- #end forecast hour string efh = '54hrs' #convert to grid indices, add 1 to the right endpoints so python includes the last index l = int(abs(-180 - w) * 2) r = int(abs(-180 - e) * 2) + 1 t = int((90 - n) * 2) b = int((90 - s) * 2) + 1 #convert lists of variables into strings for loading files prior_var_str = ef.var_string(prior_var) #get the times for the forecast fy = forecast_time.strftime('%Y') fm = forecast_time.strftime('%m') fd = forecast_time.strftime('%d') fh = forecast_time.strftime('%H') # Filepath of the prior forecast prior_path = '/home/disk/hot/stangen/Documents/prior_ensembles/' + ens + '/' + fy + fm + '/' + fy + '-' + fm + '-' + fd + '_' + fh + '_' + efh + '_' + prior_var_str + '.nc' # Load the prior data print('loading prior netCDF: ' + fy + fm + fd + fh) with Dataset(prior_path, 'r') as ncdata: #print(ncdata.variables) times = ncdata.variables['time']
savedir = '/home/disk/hot/stangen/Documents/EFA/AR/plots/' #create strings for loading txt file containing the stats sy = start_date.strftime('%Y') sm = start_date.strftime('%m') sd = start_date.strftime('%d') sh = start_date.strftime('%H') ey = end_date.strftime('%Y') em = end_date.strftime('%m') ed = end_date.strftime('%d') eh = end_date.strftime('%H') datestr=sy+sm+sd+sh+'-'+ey+em+ed+eh varstr = ef.var_string(assim_obs) gridstr = ef.var_string(grid) if AR_specific == True: filepath = filedir+datestr+'_'+varstr+'_'+gridstr+'_gridobs_ARspecific.txt' else: filepath = filedir+datestr+'_'+varstr+'_'+gridstr+'_gridobs.txt' f1 = open(filepath, 'r') stats = f1.readlines() stats_dict = {} #-----------Changed it so that var is now ensemble type, ens is now observation variable type------
def run_efa(ob_type, update_var, ob_err_var): """ All of the inputs are lists. """ #print values used print('localization radius: ' + str(localize_radius)) print('observation error variance: ' + str(ob_err_var)) #assign the radius to the localization_radius variable in Observation #create a string for saving to specific directories if loc_type == 'GC': localize_type = 'GC' loc_rad = localize_radius loc_str = str(loc_rad) elif loc_type == 'hybrid': localize_type = 'GC' #set the localization string to be the special loc radius, plus hybrid (2000hybrid) loc_str = str(localize_radius) + loc_type elif loc_type.startswith('statsig'): localize_type = loc_type loc_rad = localize_radius loc_str = str(localize_radius) + loc_type #points within a radius of ob will be fully updated by covariance (not localized) elif loc_type == 'cutoff': localize_type = loc_type loc_rad = localize_radius loc_str = str(localize_radius) + loc_type else: loc_rad = None loc_str = '_stat_sig' y = date.strftime('%Y') m = date.strftime('%m') d = date.strftime('%d') h = date.strftime('%H') observations = [] if loc_type == 'hybrid': #need to access 12-hour forecast IVT to determine presence of AR efaIVT = Load_Data(date, ensemble_type, variables, 'IVT', ['IVT'], grid=grid, new_format=new_format, efh=efh) #initialize an instance of the Load_data class (load in data) IVT_statecls, lats, lons, elevs = efaIVT.load_netcdfs() #get IVT 12 hours into the forecast, at time of observations #(time_ind = 2 is forecast hour 12) IVT = IVT_statecls.variables['IVT'].values[2, :, :, :] #obtain the mean of the ensemble (nlats x nlons) ens_mean = IVT.mean(axis=-1) #obtain variance of the ensemble (nlats x nlons) variance = np.var(IVT, axis=-1, ddof=1) #loop through each observation type for o, o_type in enumerate(ob_type): #if we are wanting to use ensemble variance as ob error variance if ob_err_var[o].startswith('ensvar'): use_ens_var = True else: use_ens_var = False efa = Load_Data(date, ensemble_type, variables, o_type, update_var, grid=grid, new_format=new_format, efh=efh) #only need to load the netCDF once (first time through the obtype loop) if o == 0: #initialize an instance of the Load_data class (load in data) statecls, lats, lons, elevs = efa.load_netcdfs() #if we are assimilating MADIS observations if ob_category == 'madis': #load in the obs file obs = efa.load_obs() #if we are assimilating next-cycle 0-hour forecast gridded "observations" elif ob_category == 'gridded': #load in the obs file obs = efa.load_obs(forecast_hour=12, madis=False, variance=use_ens_var) #loop through each line in the text file (loop through each observation) for ob in obs: #this gets the observation information from the text file ob_dict = mt.get_ob_info(ob, use_ens_var) #get longitude positive-definite- ie -130 lon is 230 E if ob_dict['lon'] < 0: ob_dict['lon'] = ob_dict['lon'] + 360 #get ob time in datetime format utctime = datetime.utcfromtimestamp(ob_dict['time']) if ob_category == 'madis': #check elevation of 4 nearest gridpoints to see whether or not to assimilate ob- #returns true or false TorF = ef.closest_points(ob_dict['lat'], ob_dict['lon'], lats, lons, ob_dict['elev'], elevs) elif ob_category == 'gridded': #no need to check elevation, since using gridded obs TorF = True #if we are using ens variance as ob error variance if use_ens_var == True: #multiply by factor specified at end of 'ensvar' mult_factor = ob_err_var[o].replace('ensvar', '') if mult_factor == '': mult_factor = 1 else: mult_factor = float(mult_factor) print('multiplication factor: ', mult_factor) ob_var = ob_dict['variance'] * mult_factor elif use_ens_var == False: ob_var = float(ob_err_var[o]) #if we are using a hybrid localization radius- longer localization #radius within the AR, 1000 km outside of it #if the lat/lon lie within some AR box, check if the ob point is in an AR- #if IVT > 250 if loc_type == 'hybrid': if ob_dict['lon'] >= 140 and ob_dict['lon'] <= 245 and ob_dict[ 'lat'] >= 33 and ob_dict['lat'] <= 51: #get the ensemble value at the lat/lon pair (ob_variance isn't used) ob_value, ob_variance = ef.closest_points( ob_dict['lat'], ob_dict['lon'], lats, lons, variable=ens_mean, need_interp=True, gen_obs=True, variance=variance) #if ob value is AR and in the grid area, set its localization radius to the input (hybrid) loc_rad if ob_value >= 250: loc_rad = localize_radius #print(str(ob_value)) else: loc_rad = 1000 else: #set the default loc_rad to be 1000 loc_rad = 1000 #check if it's working # print(str(ob_dict['ob'])) # print(str(ob_dict['lat'])+' '+str(ob_dict['lon'])+' '+str(loc_rad)) #fill the observation class object with information for assimilation obser = Observation(value=ob_dict['ob'], time=utctime, lat=ob_dict['lat'], lon=ob_dict['lon'], obtype=o_type, localize_radius=loc_rad, assimilate_this=TorF, error=ob_var) observations.append(obser) print('loaded ' + str(len(observations)) + ' obs for assimilation') # Put the state class object and observation objects into EnSRF object assimilator = EnSRF(statecls, observations, inflation=inflation, loc=localize_type) # Update the prior with EFA- state is an EnsembleState object and is the posterior, post_obs isn't used state, post_obs = assimilator.update() #---build the string of which directory to save the file-------------------------------- outdir = '/home/disk/hot/stangen/Documents/posterior_ensembles/' #what kind of observations are we assimilating? outdir += ob_category + '/' #are the observations only updating their corresponding variable, or #are they updating all variables? if self_update == True: outdir += 'ob_update_self/' elif self_update == False: outdir += 'ob_update_all/' #directory for inflation used outdir += 'inf_' + inflation_str + '/' #directory for the type of localization used outdir += 'loc_' + loc_str + '/' #add directories for the ens type and year & month outdir += ensemble_type + '/' + y + m + '/' #---------------------------------------------------------------------------------------- #Create output directories if they don't yet exist if (os.path.isdir(outdir)): pass else: os.makedirs(outdir) #add initialization date to filename outdir_date_ens = outdir + y + '-' + m + '-' + d + '_' + h #new format string: add nhrs and if gridded, grid dimensions to filename if new_format == True: outdir_date_ens += '_' + str(efh) + 'hrs' if ob_category == 'gridded': outdir_date_ens += '_' + ef.var_string(grid) #convert 1-length ob err var list to a string with no decimals #this makes it so that multiple ob err vars that acted on one data type #can all be saved to one netCDF- makes managing files easier where I was testing #what observation error variance to use. #more specifically, this adds ob err var to the variable name in the netCDF. #ob_err_var_str is only called if self_update == True, so the ob err var #is never in the variable name within the netCDF if self_update == False. ob_err_var_str = '' if use_oberrvar == True: ob_err_var_str = str(ob_err_var[0]).replace('.', '-') #if we don't want the observation error variance used in the name of the file #and/or the names of the variables in the file elif use_oberrvar == False: ob_err_var = '' #if we are only updating the variable type that matches the observation type if self_update == True: for var in state.vars(): checkfile = outdir_date_ens + '*' # If we're self-updating variables and we are on the 2nd or later variable in # the loop, this will return with something existing_file = glob.glob(checkfile) #if other variables already exists, append to the netCDF if existing_file != []: print('Appending to existing file!') #should only be one file in there, convert from list to string existing_file = existing_file[0] # append to the existing file with Dataset(existing_file, 'a') as dset: print('Writing variable {}'.format(var)) dset.createVariable(var + ob_err_var_str, np.float32, ( 'time', 'lat', 'lon', 'ens', )) dset.variables[var + ob_err_var_str].units = ef.get_units(var) dset.variables[var + ob_err_var_str][:] = state[var].values #if the filename already contains a .nc because we created the #file we are appending to separately from this run, delete #the .nc existing_file = existing_file.replace('.nc', '') # Rename the checkfile so the filename no longer specifies a # single variable type- add new variable to filename newfile = existing_file + '_' + ef.var_num_string( ob_type, ob_err_var) + '.nc' os.system('mv {} {}'.format(existing_file + '.nc', newfile)) else: # If the checkfile does not exist, make a new file outfile = outdir_date_ens + '_' + ef.var_num_string( ob_type, ob_err_var) + '.nc' ef.make_netcdf(state, outfile, ob_err_var_str) #if we are updating all variable types with the observation (regardless of its type) #and use_oberrvar == True, observation error variance is not saved in the #variable name in the netCDF, but it is saved in the filename. elif self_update == False: outfile = outdir_date_ens + '_' + ef.var_num_string( ob_type, ob_err_var) + '.nc' ef.make_netcdf(state, outfile)
def create_new_netcdf(date, ens_type, in_vrbls, vrbls): """ This function creates a netCDF from a raw TIGGE netCDF. The main purpose of this is to change surface pressure to altimeter setting, calculate 6-hourly precipitation, and to rename/shorten variable names. Unfortunately using the float32 format for the variables uses twice the memory of the raw TIGGE int16 format. """ y = date.strftime('%Y') m = date.strftime('%m') d = date.strftime('%d') h = date.strftime('%H') print('Working on ' + d + '_' + h + ' ' + ens_type) #rename TIGGE variable names vardict = { 'T2M': 't2m', 'ALT': 'sp', 'P6HR': 'tp', 'TCW': 'tcw', 'elev': 'orog', 'lat': 'latitude', 'lon': 'longitude', 'time': 'time', 'mem': 'number' } #build a var string corresponding with naming convention of tigge files invar_string = ef.var_string(in_vrbls) #build a var string corresponding with naming convention of output files outvar_string = ef.var_string(vrbls) # This is the input directory for the raw TIGGE netcdf indir = '/home/disk/hot/stangen/Documents/tigge_ensembles/' + ens_type + '/' + y + m + '/' + y + '-' + m + '-' + d + '_' + h + '_' + ens_type + '_' + invar_string + '_' + lev + '.nc' # This is the directory for the orography file orography = '/home/disk/hot/stangen/Documents/tigge_ensembles/orography/2013-04-01_00_' + ens_type + '.nc' # This is the output directory for the netcdf with altimeter setting outdir = '/home/disk/hot/stangen/Documents/prior_ensembles/' + ens_type + '/' + y + m + '/' #Create output directories if they don't yet exit if (os.path.isdir(outdir)): pass else: os.makedirs(outdir) #Read the ensemble netcdf file ncdata = Dataset(indir, 'r') # Shape of ncdata is nvars, ntimes, nmems, nlats, nlons #print(ncdata.variables.keys()) tunit = ncdata.variables[vardict['time']].units ftimes = num2date(ncdata.variables[vardict['time']][:], tunit) nmems = len(ncdata.dimensions[vardict['mem']]) ntimes = len(ftimes) nvars = len(vrbls) nlats = len(ncdata.dimensions[vardict['lat']]) nlons = len(ncdata.dimensions[vardict['lon']]) #time range of ensemble, for naming of file ftime_diff = ftimes[-1] - ftimes[0] tr = int((ftime_diff.days) * 24 + (ftime_diff.seconds) / 3600) tr_str = str(tr) + 'hrs' # Allocate the state array print('Allocating the state vector array...') state = np.zeros((nvars, ntimes, nlats, nlons, nmems)) # For the metadata, need a list of locations lats = ncdata.variables[vardict['lat']][:][:, None] lons = ncdata.variables[vardict['lon']][:][None, :] # Do a 2d mesh of lat and lon lonarr, latarr = np.meshgrid(lons, lats) #And an array of ensemble members memarr = np.arange(1, nmems + 1) # Now to populate the state array for va, var in enumerate(vrbls): #reason index 0:2 is checked is that QF and D-QF can take place at different #levels (QF850, D-QF850), so to avoid specifying each possible variable name, #just check 1st 2 chars. if var[0:2] not in ['QF', 'D-']: field = ncdata.variables[vardict[var]][:, :, :, :] #print(field) print('Adding variable {}'.format(var)) #convert surface pressure to altimeter setting if var == 'ALT': #Read the orography netcdf file- for calculating altimeter setting #has a time index, even when only one time is gotten from TIGGE- requires #indexing like [0,:,:] to get this first (and only) time. orogdata = Dataset(orography, 'r') elev = orogdata.variables[vardict['elev']][0, :, :] #Convert surface pressure to altimeter setting in mb #pressure in netcdf file is in pascals presinmb = field / 100 field = presinmb / ((288 - 0.0065 * elev) / 288)**5.2561 #find 6-hourly precipitation if var == 'P6HR': #create dummy field to facilitate subtracting of total precipitation #t-1 from t without saving over t, so the next subtraction still works field2 = np.zeros((ntimes, nmems, nlats, nlons)) for t in range(0, ntimes): if t == 0: field2[t, :, :, :] = field[t, :, :, :] #subtract previous time's precipitation to get 6 hour precipitation elif t > 0: field2[t, :, :, :] = field[t, :, :, :] - field[ t - 1, :, :, :] #reassign 6 hour precip to field field = field2 #magnitude of moisture flux if var[0:2] == 'QF': q = ncdata.variables['q'][:, :, :, :] u = ncdata.variables['u'][:, :, :, :] v = ncdata.variables['v'][:, :, :, :] #moisture flux is qV, to find magnitude find distance from origin #to point, multiply by q, multiply by 1000 to get in units of g/kg. field = q * np.sqrt(u**2 + v**2) * 1000 #direction of moisture flux (-180 to 180, unit circle degrees) if var[0:2] == 'D-': u = ncdata.variables['u'][:, :, :, :] v = ncdata.variables['v'][:, :, :, :] field = np.arctan2(v, u) * 180 / np.pi #print(field.shape) # make the ensemble dimension at the end of state field = np.swapaxes(field, 1, 3) field = np.swapaxes(field, 1, 2) # Populate its component of the state array state[va, :, :, :, :] = field print('Writing to netcdf...') # Convert times back to integers valid_times = date2num(ftimes, tunit) # Write ensemble forecast to netcdf - change name here #dset = Dataset(outdir+y+'-'+m+'-'+d+'_'+h+'_'+ens_type+'_'+outvar_string+'.nc','w') dset = Dataset( outdir + y + '-' + m + '-' + d + '_' + h + '_' + tr_str + '_' + outvar_string + '44.nc', 'w') dset.createDimension('time', None) dset.createDimension('lat', nlats) dset.createDimension('lon', nlons) dset.createDimension('ens', nmems) dset.createVariable('time', 'i4', ('time', )) dset.createVariable('lat', np.float32, ('lat', )) dset.createVariable('lon', np.float32, ('lon')) dset.createVariable('ens', 'i4', ('ens', )) dset.variables['time'].units = tunit dset.variables['lat'].units = 'degrees_north' dset.variables['lon'].units = 'degrees_east' dset.variables['ens'].units = 'member_number' dset.variables['time'][:] = np.array(valid_times) dset.variables['lat'][:] = lats dset.variables['lon'][:] = lons dset.variables['ens'][:] = memarr for v, var in enumerate(vrbls): #var = vardict[var] print('Writing variable {}'.format(var)) dset.createVariable(var, np.float32, ( 'time', 'lat', 'lon', 'ens', )) dset.variables[var].units = ef.get_units(var) dset.variables[var][:] = state[v, :, :, :, :] #completes writing the file dset.close()
elif use_oberrvar == 'false': use_oberrvar = False #are the observations only updating their corresponding variable, or #are they updating all variables? -ie t2m only updates t2m, alt only updates alt self_update = sys.argv[20] if self_update == 'true': self_update = True #true if you want the above updates, otherwise false elif self_update == 'false': self_update = False datestr = startstr + '-' + endstr save_dir = '/home/disk/hot/stangen/Documents/EFA/duplicate_madaus/mse_var_output/' #variable string (for saving the .txt file) varstr = ef.var_string(allobs) #make a list of netCDF variable names/strings from obs+obs error var #post_vrbls contains the variable names in the filename in the posterior netcdf_varnames = [] #make a list of variable names we want to save to the .txt file (this may include ob err variance, #even when not doing self-update) dict_varnames = [] for i, ob in enumerate(ob_types): #If we are loading prior to do stats on, we need to set ob error variance to #[''] to make variable in netCDF load correctly. #also, if we did not previously use the obs err var in creating the posterior #ens name/variable names, or did not only self-update each variable, #set ob_err_var to [''], since ob err var was not included in names.
savedir = '/home/disk/hot/stangen/Documents/EFA/duplicate_madaus/plots/' #create strings for loading txt file containing the stats sy = start_date.strftime('%Y') sm = start_date.strftime('%m') sd = start_date.strftime('%d') sh = start_date.strftime('%H') ey = end_date.strftime('%Y') em = end_date.strftime('%m') ed = end_date.strftime('%d') eh = end_date.strftime('%H') datestr=sy+sm+sd+sh+'-'+ey+em+ed+eh varstr = ef.var_string(variables) filepath = filedir+datestr+'_'+varstr if ob_category == 'gridded': filepath += '_gridobs.txt' elif ob_category == 'madis': filepath += '.txt' f1 = open(filepath, 'r') stats = f1.readlines() stats_dict = {} for line in stats: line_split = line.split(',')
fd = forecast_time.strftime('%d') fh = forecast_time.strftime('%H') #convert to grid indices, add 1 to the right endpoints so python includes the last index l = int((w + 180) * 2) r = int((e + 180) * 2) + 1 t = int((90 - n) * 2) b = int((90 - s) * 2) + 1 #deal with crossing over 180W if l > r: l = l - 720 #convert lists of variables into strings for loading files prior_var_str = ef.var_string(prior_var) grid_str = ef.var_string(grid) #get forecast index- i.e how many timesteps after initialization we are looking at timediff = analysis_time - forecast_time tdd = timediff.days tds = timediff.seconds tdh = tdd * 24 + tds / 3600 #timesteps of forecast output are 6 hours apart timeind = int(tdh / 6) #dictionary for ensemble types, for use in plotting ens_dict = {'ncep': 'GEFS', 'eccc': 'CMC', 'ecmwf': 'ECMWF'} # Filepath of the analysis at the desired time analysis_path = '/home/disk/hot/stangen/Documents/prior_ensembles/' + ens + '/' + ay + am + '/' + ay + '-' + am + '-' + ad + '_' + ah + '_' + efh + '_' + prior_var_str + '.nc'
end_index=int(sys.argv[11]) boolstr=sys.argv[12] #change string to boolean for loading prior or posterior ensembles if boolstr == 'true': post=True elif boolstr =='false': post=False loc_rad = sys.argv[13] inflation = sys.argv[14] ob_category = sys.argv[15] datestr = startstr+'-'+endstr save_dir = '/home/disk/hot/stangen/Documents/EFA/duplicate_madaus/mse_var_output/' #variable string varstr = ef.var_string(allobs) #make a list of netCDF variable names/strings in part of file name from obs/obs error var obtype_errvar = [] for i, ob in enumerate(ob_types): obtype_errvar.append(ef.var_num_string([ob],[ob_err_var[i]])) #prior/post string if post==True: prior_or_post='loc'+loc_rad if post==False: prior_or_post='prior' #last forecast hour I want to get observations for end_hour = 6*end_index #the dict where all the data is ob_dict = {} #dict for the means from the data
#start and end date to get ensembles. start_date = datetime(2015,11,10,0) #YYYY,m,d,h end_date = datetime(2015,11,17,12) hourstep = 12 #how often you want a new forecast initialization, usually 12 hr #variables with names coming from the raw TIGGE- see get_tigge_data if unsure of names. #the order matters to make filename match exactly. surf_variables = ['D2M','SP','U10','V10'] #surface variables upper_variables = ['Q','U','V'] #upper variables levels = ['1000','925','850','700','500','300'] # levels in the upper level netCDF end_variables = ['IWV','IVT','D-IVT'] #variables we want to produce from this script. #------------------------------------------------------------------------------ #a list of dates to loop through to load each forecast initialized on these dates dates = mt.make_datetimelist(start_date,end_date,hourstep) #strings for loading the surface and aloft TIGGE netCDFs surf_str = ef.var_string(surf_variables)+'_sfc.nc' upper_str = ef.var_string(upper_variables)+'_'+ef.var_string(levels)+'_pl.nc' #number of variables in the netCDF produced from this script nvars = len(end_variables) g = 9.80665 outvar_string = ef.var_string(end_variables) for ens in ensemble_type: for date in dates: y,m,d,h = ef.dt_str_timedelta(date) print('Working on '+d+'_'+h+' '+ens)
def load_netcdfs(self,post=False,ob_cat='madis',ob_upd='ob_update_self',inf='none',lr='1000'): """ Loads the ensemble netCDF and the elevation netCDF. Packages and returns ensemble data into an EnsembleState (xarray) object for use in efa_xray code. Also returns latitudes, longitudes, and elevations of the ensemble type. If posterior ensemble, there are more options for exactly what EFA took place for finding the right file. post = boolean, if true, we are loading the posterior, if false, loading the prior. posterior options: ob_cat = observation category, either 'madis' or 'gridded' observations ob_upd = observation update, is either 'ob_update_self' or 'ob_update_all'- did we use the observations to update just their corresponding variables, or did we allow them to update all variable types in the ensemble? inf = inflation lr = localization radius we used """ # directory where the ensemble of all times is if post==False: if self.new_format == False: infile = '/home/disk/hot/stangen/Documents/prior_ensembles/'+self.ens_type+'/'+self.y+self.m+'/'+self.y+'-'+self.m+'-'+self.d+'_'+self.h+'_'+self.ens_type+'_'+self.var_string+'.nc' elif self.new_format == True: infile = '/home/disk/hot/stangen/Documents/prior_ensembles/'+self.ens_type+'/'+self.y+self.m+'/'+self.y+'-'+self.m+'-'+self.d+'_'+self.h+'_'+self.efh+'_'+self.var_string+'.nc' prior_or_post='prior' elif post==True: post_varstring = ef.var_string(self.post_vrbls) if self.new_format == False: infile = '/home/disk/hot/stangen/Documents/posterior_ensembles/'+ob_cat+'/'+ob_upd+('/inf_'+inf).replace('.','-')+'/loc_'+str(lr)+'/'+self.ens_type+'/'+self.y+self.m+'/'+self.y+'-'+self.m+'-'+self.d+'_'+self.h+'_'+self.ens_type+'_'+post_varstring+'.nc' elif self.new_format == True: infile = '/home/disk/hot/stangen/Documents/posterior_ensembles/'+ob_cat+'/'+ob_upd+('/inf_'+inf).replace('.','-')+'/loc_'+str(lr)+'/'+self.ens_type+'/'+self.y+self.m+'/'+self.y+'-'+self.m+'-'+self.d+'_'+self.h+'_'+self.efh+'_'+str(self.l)+'_'+str(self.r)+'_'+str(self.t)+'_'+str(self.b)+'_'+str(self.s)+'_'+post_varstring+'.nc' prior_or_post='posterior' print('loading netcdf file: '+prior_or_post+': '+self.ens_type+' '+self.y+self.m+self.d+'_'+self.h+'00') # loading/accessing the netcdf data ncdata = Dataset(infile,'r') #print(ncdata.variables.keys()) times = ncdata.variables['time'] ftimes = num2date(times[:], times.units) lats = ncdata.variables['lat'][:] lons = ncdata.variables['lon'][:] mems = ncdata.variables['ens'][:] #print(ncdata.variables) # directory where the orography file is orography = '/home/disk/hot/stangen/Documents/tigge_ensembles/orography/2013-04-01_00_'+self.ens_type+'.nc' orog_data = Dataset(orography,"r") #print(of.variables) elevs = orog_data.variables['orog'][0,:] # Elevation of ecmwf, eccc, ncep # storing the variable data in a dict (state?) allvars = {} for var in self.update_var: allvars[var] = (['validtime','y','x','mem'], ncdata.variables[var][:]) lonarr, latarr = np.meshgrid(lons, lats) pack_str = ef.var_string(self.update_var) print('packaging '+pack_str+' into EnsembleState object') # Package into an EnsembleState object knowing the state and metadata statecls = EnsembleState.from_vardict(allvars, {'validtime' : ftimes, 'lat' : (['y','x'], latarr), 'lon' : (['y','x'], lonarr), 'mem' : mems, }) return statecls, lats, lons, elevs