Exemple #1
0
def run_efa(ob_type, update_var):

    #assign the radius to the localization_radius variable in Observation
    #create a string for saving to specific directories
    if loc_type == 'GC':
        loc_rad = localize_radius
        loc_str = str(loc_rad)
    else:
        loc_rad = None
        loc_str = '_stat_sig'

    time_lag = False

    #a list of dates to loop through to load each forecast initialized on these dates

    y = date.strftime('%Y')
    m = date.strftime('%m')
    d = date.strftime('%d')
    h = date.strftime('%H')

    observations = []
    for o, o_type in enumerate(ob_type):
        efa = Load_Data(date, ensemble_type, variables, o_type, update_var)
        #only need to load the netCDF once (first time through the obtype loop)
        if o == 0:
            #initialize an instance of the Load_data class (load in data)
            statecls, lats, lons, elevs = efa.load_netcdfs()

        #load in the obs file
        obs = efa.load_obs()

        #loop through each line in the text file (loop through each observation)
        for ob in obs:
            #this gets the observation information from the text file
            ob_dict = mt.get_ob_info(ob)
            #get longitude positive-definite- ie -130 lon is 230 E
            if ob_dict['lon'] < 0:
                ob_dict['lon'] = ob_dict['lon'] + 360
            utctime = datetime.utcfromtimestamp(ob_dict['time'])
            #check elevation of 4 nearest gridpoints to see whether or not to assimilate ob
            TorF = ef.closest_points(ob_dict['lat'], ob_dict['lon'], lats,
                                     lons, ob_dict['elev'], elevs)
            #fill the observation class object with information for assimilation
            obser = Observation(value=ob_dict['ob'],
                                time=utctime,
                                lat=ob_dict['lat'],
                                lon=ob_dict['lon'],
                                obtype=o_type,
                                localize_radius=loc_rad,
                                assimilate_this=TorF,
                                error=1)
            observations.append(obser)
    print('loaded ' + str(len(observations)) + ' obs for assimilation')

    #    ob1 = Observation(value=10000.25, time=datetime(2013,4,1,6),lat=24.55,lon=278.21,
    #                  obtype = ob_type[0], localize_radius=1000, assimilate_this=True,
    #                  error=1)
    #
    #
    #    observations.append(ob1)
    #
    # Put the state class object and observation objects into EnSRF object
    assimilator = EnSRF(statecls, observations, loc=loc_type)

    # Update the prior with EFA- post_state is an EnsembleState object
    post_state, post_obs = assimilator.update()

    state = post_state

    #build the string of where to save the files
    outdir = '/home/disk/hot/stangen/Documents/posterior_ensembles/'
    #are the observations only updating their corresponding variable, or
    #are they updating all variables?
    if self_update == True:
        outdir += 'ob_update_self/'
    elif self_update == False:
        outdir += 'ob_update_all/'
    #directory for the type of localization used
    outdir += 'loc_' + loc_str + '/'
    #if dealing with time-lagged ensembles, add another directory
    if time_lag == True:
        outdir += 'time_lag/'
    #add directories for the ens type and year & month
    outdir += ensemble_type + '/' + y + m + '/'

    #Create output directories if they don't yet exist
    if (os.path.isdir(outdir)):
        pass
    else:
        os.makedirs(outdir)

    outdir_date_ens = outdir + y + 'T2M-' + m + '-' + d + '_' + h + '_' + ensemble_type

    #if we are only updating the variable type that matches the observation type
    if self_update == True:
        for var in state.vars():
            checkfile = outdir_date_ens + '*'
            # If we're writing a single variable and the other variable already exists,
            # append to that file
            existing_file = glob.glob(checkfile)
            #if the other variable already exists
            if existing_file != []:
                #when only working with 2 variables, this should hold true
                last_ob = True
                print('Appending to existing file!')
                #should only be one file in there, convert from list to string
                existing_file = existing_file[0]
                # append to the existing file
                with Dataset(existing_file, 'a') as dset:
                    print('Writing variable {}'.format(var))
                    dset.createVariable(var, np.float32, (
                        'time',
                        'lat',
                        'lon',
                        'ens',
                    ))
                    dset.variables[var].units = ut.get_units(var)
                    dset.variables[var][:] = state[var].values
                #if the filename already contains a .nc because we created the
                #file we are appending to separately from this run, delete
                #the .nc
                existing_file = existing_file.replace('.nc', '')
                # Rename the checkfile so the filename no longer specifies a
                # single variable type
                newfile = existing_file + '_' + ef.var_string(ob_type)
                if last_ob == True:
                    newfile += '.nc'
                    print('Done!')

                os.system('mv {} {}'.format(existing_file, newfile))
                # ALL DONE!!
            else:

                # If the checkfile does not exist, make a new file
                outfile = outdir_date_ens + '_' + ef.var_string(ob_type)
                #if we are assimilating only one type of observation
                if len(obs_type) == 1:
                    outfile = outfile + '.nc'
                ef.make_netcdf(state, outfile)

    #if we are updating all variable types with the observation (regardless of its type)
    elif self_update == False:
        outfile = outdir_date_ens + '_' + ef.var_string(ob_type) + '.nc'
        ef.make_netcdf(state, outfile)
Exemple #2
0
    lats = ncdata.variables['lat'][:]
    lons = ncdata.variables['lon'][:]
    nens = len(ncdata.variables['ens'][:])
    prior = ncdata.variables[vrbl][:]
    prior_units = ncdata.variables[vrbl].units

#dummy elevations so closest_points function works
elevs = np.zeros([len(lats), len(lons)])
#get observation time (12 hours after forecast)
ob_time = forecast_time + timedelta(seconds=3600 * 12)
#get ensemble estimate of observation (interp)
interp, TorF = ef.closest_points(ob_lat,
                                 ob_lon,
                                 lats,
                                 lons,
                                 0,
                                 elevs,
                                 ob_time,
                                 ftimes,
                                 prior,
                                 need_interp=True)

r_crit = ef.get_r_crit()
#get ranks/rank perts of ob estimate and state
ye_rank = np.argsort(interp)
ye_rank = np.argsort(ye_rank)

#time indices
time_ind = [2, 4, 6, 8]

for i in time_ind:
    prior_mean = np.mean(prior[i, :], axis=-1)
                for ob_counter, ob in enumerate(obs):
                    #this gets the observation information from the text file
                    ob_info = mt.get_ob_info(ob)
                    #get longitude positive-definite- ie -130 lon is 230 E
                    if ob_info['lon'] < 0:
                        ob_info['lon'] = ob_info['lon'] + 360
                    utctime = datetime.utcfromtimestamp(ob_info['time'])
                    #find interpolated ob estimate, if it passes the terrain check.
                    #the terrain check is done within the closest_points function
                    interp, TorF = ef.closest_points(
                        ob_info['lat'],
                        ob_info['lon'],
                        lats,
                        lons,
                        ob_info['elev'],
                        elevs,
                        utctime,
                        statecls['validtime'].values,
                        statecls.variables[netcdf_varnames[i]].values,
                        need_interp=True)

                    ob_id = ob_info['name']
                    ob_value = ob_info['ob']

                    #for stations which pass the terrain check (and were assimilated):
                    if TorF == True:
                        #calculate MSE
                        se = (np.mean(interp) - ob_value)**2
                        #calculate variance
                        hx_variance_unbiased = np.var(interp, ddof=1)
Exemple #4
0
def run_efa(ob_type, update_var, ob_err_var):
    """
    All of the inputs are lists.
    """

    #print values used
    print('localization radius: ' + str(localize_radius))
    print('observation error variance: ' + str(ob_err_var))

    #assign the radius to the localization_radius variable in Observation
    #create a string for saving to specific directories
    if loc_type == 'GC':
        localize_type = 'GC'
        loc_rad = localize_radius
        loc_str = str(loc_rad)
    elif loc_type == 'hybrid':
        localize_type = 'GC'
        #set the localization string to be the special loc radius, plus hybrid (2000hybrid)
        loc_str = str(localize_radius) + loc_type

    elif loc_type.startswith('statsig'):
        localize_type = loc_type
        loc_rad = localize_radius
        loc_str = str(localize_radius) + loc_type

    #points within a radius of ob will be fully updated by covariance (not localized)
    elif loc_type == 'cutoff':
        localize_type = loc_type
        loc_rad = localize_radius
        loc_str = str(localize_radius) + loc_type

    else:
        loc_rad = None
        loc_str = '_stat_sig'

    y = date.strftime('%Y')
    m = date.strftime('%m')
    d = date.strftime('%d')
    h = date.strftime('%H')

    observations = []

    if loc_type == 'hybrid':
        #need to access 12-hour forecast IVT to determine presence of AR
        efaIVT = Load_Data(date,
                           ensemble_type,
                           variables,
                           'IVT', ['IVT'],
                           grid=grid,
                           new_format=new_format,
                           efh=efh)

        #initialize an instance of the Load_data class (load in data)
        IVT_statecls, lats, lons, elevs = efaIVT.load_netcdfs()
        #get IVT 12 hours into the forecast, at time of observations
        #(time_ind = 2 is forecast hour 12)
        IVT = IVT_statecls.variables['IVT'].values[2, :, :, :]

        #obtain the mean of the ensemble (nlats x nlons)
        ens_mean = IVT.mean(axis=-1)

        #obtain variance of the ensemble (nlats x nlons)
        variance = np.var(IVT, axis=-1, ddof=1)

    #loop through each observation type
    for o, o_type in enumerate(ob_type):

        #if we are wanting to use ensemble variance as ob error variance
        if ob_err_var[o].startswith('ensvar'):
            use_ens_var = True
        else:
            use_ens_var = False

        efa = Load_Data(date,
                        ensemble_type,
                        variables,
                        o_type,
                        update_var,
                        grid=grid,
                        new_format=new_format,
                        efh=efh)
        #only need to load the netCDF once (first time through the obtype loop)
        if o == 0:
            #initialize an instance of the Load_data class (load in data)
            statecls, lats, lons, elevs = efa.load_netcdfs()

        #if we are assimilating MADIS observations
        if ob_category == 'madis':
            #load in the obs file
            obs = efa.load_obs()
        #if we are assimilating next-cycle 0-hour forecast gridded "observations"
        elif ob_category == 'gridded':
            #load in the obs file
            obs = efa.load_obs(forecast_hour=12,
                               madis=False,
                               variance=use_ens_var)

        #loop through each line in the text file (loop through each observation)
        for ob in obs:
            #this gets the observation information from the text file
            ob_dict = mt.get_ob_info(ob, use_ens_var)
            #get longitude positive-definite- ie -130 lon is 230 E
            if ob_dict['lon'] < 0:
                ob_dict['lon'] = ob_dict['lon'] + 360
            #get ob time in datetime format
            utctime = datetime.utcfromtimestamp(ob_dict['time'])
            if ob_category == 'madis':
                #check elevation of 4 nearest gridpoints to see whether or not to assimilate ob-
                #returns true or false
                TorF = ef.closest_points(ob_dict['lat'], ob_dict['lon'], lats,
                                         lons, ob_dict['elev'], elevs)
            elif ob_category == 'gridded':
                #no need to check elevation, since using gridded obs
                TorF = True

            #if we are using ens variance as ob error variance
            if use_ens_var == True:
                #multiply by factor specified at end of 'ensvar'
                mult_factor = ob_err_var[o].replace('ensvar', '')
                if mult_factor == '':
                    mult_factor = 1
                else:
                    mult_factor = float(mult_factor)
                print('multiplication factor: ', mult_factor)
                ob_var = ob_dict['variance'] * mult_factor
            elif use_ens_var == False:
                ob_var = float(ob_err_var[o])

            #if we are using a hybrid localization radius- longer localization
            #radius within the AR, 1000 km outside of it
            #if the lat/lon lie within some AR box, check if the ob point is in an AR-
            #if IVT > 250
            if loc_type == 'hybrid':

                if ob_dict['lon'] >= 140 and ob_dict['lon'] <= 245 and ob_dict[
                        'lat'] >= 33 and ob_dict['lat'] <= 51:

                    #get the ensemble value at the lat/lon pair (ob_variance isn't used)
                    ob_value, ob_variance = ef.closest_points(
                        ob_dict['lat'],
                        ob_dict['lon'],
                        lats,
                        lons,
                        variable=ens_mean,
                        need_interp=True,
                        gen_obs=True,
                        variance=variance)
                    #if ob value is AR and in the grid area, set its localization radius to the input (hybrid) loc_rad
                    if ob_value >= 250:
                        loc_rad = localize_radius
                        #print(str(ob_value))
                    else:
                        loc_rad = 1000
                else:
                    #set the default loc_rad to be 1000
                    loc_rad = 1000

            #check if it's working
#            print(str(ob_dict['ob']))
#            print(str(ob_dict['lat'])+' '+str(ob_dict['lon'])+' '+str(loc_rad))
#fill the observation class object with information for assimilation
            obser = Observation(value=ob_dict['ob'],
                                time=utctime,
                                lat=ob_dict['lat'],
                                lon=ob_dict['lon'],
                                obtype=o_type,
                                localize_radius=loc_rad,
                                assimilate_this=TorF,
                                error=ob_var)
            observations.append(obser)

    print('loaded ' + str(len(observations)) + ' obs for assimilation')

    # Put the state class object and observation objects into EnSRF object
    assimilator = EnSRF(statecls,
                        observations,
                        inflation=inflation,
                        loc=localize_type)

    # Update the prior with EFA- state is an EnsembleState object and is the posterior, post_obs isn't used
    state, post_obs = assimilator.update()

    #---build the string of which directory to save the file--------------------------------
    outdir = '/home/disk/hot/stangen/Documents/posterior_ensembles/'
    #what kind of observations are we assimilating?
    outdir += ob_category + '/'

    #are the observations only updating their corresponding variable, or
    #are they updating all variables?
    if self_update == True:
        outdir += 'ob_update_self/'
    elif self_update == False:
        outdir += 'ob_update_all/'
    #directory for inflation used
    outdir += 'inf_' + inflation_str + '/'
    #directory for the type of localization used
    outdir += 'loc_' + loc_str + '/'
    #add directories for the ens type and year & month
    outdir += ensemble_type + '/' + y + m + '/'
    #----------------------------------------------------------------------------------------

    #Create output directories if they don't yet exist
    if (os.path.isdir(outdir)):
        pass
    else:
        os.makedirs(outdir)

    #add initialization date to filename
    outdir_date_ens = outdir + y + '-' + m + '-' + d + '_' + h

    #new format string: add nhrs and if gridded, grid dimensions to filename
    if new_format == True:
        outdir_date_ens += '_' + str(efh) + 'hrs'
        if ob_category == 'gridded':
            outdir_date_ens += '_' + ef.var_string(grid)

    #convert 1-length ob err var list to a string with no decimals
    #this makes it so that multiple ob err vars that acted on one data type
    #can all be saved to one netCDF- makes managing files easier where I was testing
    #what observation error variance to use.
    #more specifically, this adds ob err var to the variable name in the netCDF.
    #ob_err_var_str is only called if self_update == True, so the ob err var
    #is never in the variable name within the netCDF if self_update == False.
    ob_err_var_str = ''
    if use_oberrvar == True:
        ob_err_var_str = str(ob_err_var[0]).replace('.', '-')
    #if we don't want the observation error variance used in the name of the file
    #and/or the names of the variables in the file
    elif use_oberrvar == False:
        ob_err_var = ''

    #if we are only updating the variable type that matches the observation type
    if self_update == True:
        for var in state.vars():
            checkfile = outdir_date_ens + '*'
            # If we're self-updating variables and we are on the 2nd or later variable in
            # the loop, this will return with something
            existing_file = glob.glob(checkfile)

            #if other variables already exists, append to the netCDF
            if existing_file != []:
                print('Appending to existing file!')
                #should only be one file in there, convert from list to string
                existing_file = existing_file[0]

                # append to the existing file
                with Dataset(existing_file, 'a') as dset:
                    print('Writing variable {}'.format(var))
                    dset.createVariable(var + ob_err_var_str, np.float32, (
                        'time',
                        'lat',
                        'lon',
                        'ens',
                    ))
                    dset.variables[var +
                                   ob_err_var_str].units = ef.get_units(var)
                    dset.variables[var + ob_err_var_str][:] = state[var].values
                #if the filename already contains a .nc because we created the
                #file we are appending to separately from this run, delete
                #the .nc
                existing_file = existing_file.replace('.nc', '')
                # Rename the checkfile so the filename no longer specifies a
                # single variable type- add new variable to filename
                newfile = existing_file + '_' + ef.var_num_string(
                    ob_type, ob_err_var) + '.nc'
                os.system('mv {} {}'.format(existing_file + '.nc', newfile))
            else:
                # If the checkfile does not exist, make a new file
                outfile = outdir_date_ens + '_' + ef.var_num_string(
                    ob_type, ob_err_var) + '.nc'
                ef.make_netcdf(state, outfile, ob_err_var_str)

    #if we are updating all variable types with the observation (regardless of its type)
    #and use_oberrvar == True, observation error variance is not saved in the
    #variable name in the netCDF, but it is saved in the filename.
    elif self_update == False:
        outfile = outdir_date_ens + '_' + ef.var_num_string(
            ob_type, ob_err_var) + '.nc'
        ef.make_netcdf(state, outfile)
Exemple #5
0
 def save_gridded_obs(self, forecast_hour=0, get_variance=False):
     """
     Loads the ensemble from n hours after the forecast was initialized
     and uses its 0-hour forecast as the "observation" grid. 
     Saves information about the generated gridded obs to a .txt file,
     with the same format as MADIS observations.
     
     If get_variance is true, will also save the variance of the ensemble
     at the observation locations at the end of each line.
     """
     basedir = '/home/disk/hot/stangen/Documents/gridded_obs/'
     
     #get the list of points
     points = ef.get_ob_points(self.l,self.r,self.t,self.b,self.s)
     #get the 0-hour ensemble forecast, n hours after the model was initialized
     dt0 = self.date
     dt = dt0.replace(minute = 0, second=0, microsecond=0)
     dt = dt + timedelta(hours=forecast_hour)
     
     #convert to epoch time for saving observation
     epoch = str(dt.replace(tzinfo=pytz.utc).timestamp())
     
     dty, dtm, dtd, dth = ef.dt_str_timedelta(self.date,forecast_hour)
     
     print('starting saving of '+self.ens_type+' gridded '+self.ob_type+ ' "obs" at: '+dty+dtm+dtd+'_'+dth+'00')
     
     var, lats, lons = self.load_ens_netcdf(forecast_hour)
         
     #obtain the mean of the ensemble (nlats x nlons)
     ens_mean = var.mean(axis=-1)
     
     #obtain variance of the ensemble (nlats x nlons)
     variance = np.var(var,axis=-1,ddof=1)
     
     #initialize the obs list to append to
     obs = []
     #loop through each point (lat/lon pair)
     for i, p in enumerate(points):
         ob_lat = p[0]
         ob_lon = p[1]
         #get the ensemble value at the lat/lon pair
         ob_value, ob_variance = ef.closest_points(ob_lat,ob_lon,lats,lons,variable=ens_mean,
                                      need_interp=True,gen_obs=True,variance=variance)
         obs.append(str(i)+','+str(ob_lat)+','+str(ob_lon)+','+str(0)+','+epoch+','+str(ob_value)+',GRIDDED,'+str(0))
         if get_variance == True:
             obs.append(','+str(ob_variance))
         obs.append('\n')
         
     #save directory for the observations
     if (os.path.isdir(basedir+self.ens_type+'/'+dty+dtm+'/'+self.ob_type+'/')):
         pass
     else:
         os.makedirs(basedir+self.ens_type+'/'+dty+dtm+'/'+self.ob_type+'/')
         
     #save the list of observations
     if self.new_format == False:
         f = open(basedir+self.ens_type+'/'+dty+dtm+'/'+self.ob_type+'/'+self.ob_type+'_'+dty+dtm+dtd+'_'+dth+'00.txt',"w")
     elif self.new_format == True:
         savestr = basedir+self.ens_type+'/'+dty+dtm+'/'+self.ob_type+'/'+dty+dtm+dtd+'_'+dth+'00_'+str(self.l)+'_'+str(self.r)+'_'+str(self.t)+'_'+str(self.b)+'_'+str(self.s)
         if get_variance == True:
             savestr += '_variance'
         savestr += '.txt'
         f = open(savestr,"w")
     for s in obs:
         f.write(s)
     f.close()