def set_optim_nstns_tair_norm(stnda, path_xval_ds): ''' Set the local optimal number of stations to be used for monthly normal interpolation for each U.S. climate division based on cross-validation mean absolute error. Parameters ---------- stnda : twx.db.StationSerialDataDb A StationSerialDataDb object pointing to the database for which the local optimal number of neighbors should be set. path_xval_ds : str Path where netCDF cross-validation MAE files from create_climdiv_optim_nstns_db are located ''' climdiv_stns = stnda.stns[CLIMDIV] vars_optim = {} for mth in np.arange(1, 13): varname_optim = get_optim_varname(mth) long_name = "Optimal number of neighbors to use for monthly normal interpolation for month %d" % mth var_optim = stnda.add_stn_variable( varname_optim, long_name, "", 'f8', fill_value=netCDF4.default_fillvals['f8']) vars_optim[mth] = var_optim divs = np.unique(climdiv_stns[np.isfinite(climdiv_stns)]) stchk = StatusCheck(divs.size, 10) for clim_div in divs: fpath = os.path.join( path_xval_ds, "optim_nstns_%s_climdiv%d.nc" % (stnda.var_name, clim_div)) ds_climdiv = Dataset(fpath) mae_climdiv = ds_climdiv.variables['mae'][:] nnghs_climdiv = ds_climdiv.variables['min_nghs'][:] climdiv_mask = np.nonzero(climdiv_stns == clim_div)[0] for mth in np.arange(1, 13): mae_climdiv_mth = mae_climdiv[mth - 1, :, :] mmae = np.mean(mae_climdiv_mth, axis=1) min_idx = np.argmin(mmae) vars_optim[mth][climdiv_mask] = nnghs_climdiv[min_idx] stchk.increment() stnda.ds.sync()
def __get_nnghs(self, pt, mth, stns_rm=None): self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], DFLT_INIT_NNGHS, load_obs=False, stns_rm=stns_rm) indomain_mask = np.isfinite( self.stn_slct.ngh_stns[get_optim_varname(mth)]) domain_stns = self.stn_slct.ngh_stns[indomain_mask] if domain_stns.size == 0: raise Exception( "Cannot determine the optimal # of neighbors to use!") n_wgt = self.stn_slct.ngh_wgt[indomain_mask] nnghs = np.int( np.round( np.average(domain_stns[get_optim_varname(mth)], weights=n_wgt))) return nnghs
def _get_rgn_nnghs_dict(stns): rgns = np.unique(stns[CLIMDIV][np.isfinite(stns[CLIMDIV])]) nnghsAll = {} for rgn in rgns: rgn_mask = stns[CLIMDIV] == rgn nnghsRgn = {} for mth in np.arange(1, 13): nnghsRgn[mth] = (stns[get_optim_varname(mth)][rgn_mask][0], stns[get_optim_anom_varname(mth)][rgn_mask][0]) nnghsAll[rgn] = nnghsRgn return nnghsAll
def build_empty_pt(): ptDtype = [(LON, np.float64), (LAT, np.float64), (ELEV, np.float64), (TDI, np.float64), (CLIMDIV, np.float64), (MASK, np.float64)] ptDtype.extend([("tmin%02d" % mth, np.float64) for mth in np.arange(1, 13)]) ptDtype.extend([("tmax%02d" % mth, np.float64) for mth in np.arange(1, 13)]) ptDtype.extend([(get_norm_varname(mth), np.float64) for mth in np.arange(1, 13)]) ptDtype.extend([(get_optim_varname(mth), np.float64) for mth in np.arange(1, 13)]) ptDtype.extend([(get_lst_varname(mth), np.float64) for mth in np.arange(1, 13)]) ptDtype.extend([(get_optim_anom_varname(mth), np.float64) for mth in np.arange(1, 13)]) a_pt = np.empty(1, dtype=ptDtype) return a_pt[0]
def get_krig_params(self, pt, mth, rm_stnid=None): ''' Get the moving window regression kriging variogram parameters for a specific point and month. Currently assumes an exponential variogram Parameters ---------- pt : structured array A structured array containing the point's latitude, longitude, elevation, topographic dissection index, and average land skin temperatures for each month. An empty point can be initialized with build_empty_pt() mth : int The specific month as an integer (1-12) rm_stnid : ndarray or str, optional An array of station ids or a single station id for stations that should not be considered neighbors for the specific point. Returns ---------- nug : float Exponential variogram nugget. psill : float Exponential variogram partial sill. rng : float Exponential variogram range. ''' # First determine the nnghs to use based on smoothed weighted average of # the optimal nnghs bandwidth at each station point. self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], DFLT_INIT_NNGHS, load_obs=False) indomain_mask = np.isfinite( self.stn_slct.ngh_stns[get_optim_varname(mth)]) domain_stns = self.stn_slct.ngh_stns[indomain_mask] if domain_stns.size == 0: raise Exception( "Cannot determine the optimal # of neighbors to use!") n_wgt = self.stn_slct.ngh_wgt[indomain_mask] nnghs = np.int( np.round( np.average(domain_stns[get_optim_varname(mth)], weights=n_wgt))) # Now use the optimal nnghs to get the krig params for this mth self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], nnghs, load_obs=False) nghs = self.stn_slct.ngh_stns ngh_lon = ri.FloatSexpVector(nghs[LON]) ngh_lat = ri.FloatSexpVector(nghs[LAT]) ngh_elev = ri.FloatSexpVector(nghs[ELEV]) ngh_tdi = ri.FloatSexpVector(nghs[TDI]) ngh_lst = ri.FloatSexpVector(nghs[get_lst_varname(mth)]) ngh_tair = ri.FloatSexpVector(nghs[get_norm_varname(mth)]) ngh_wgt = ri.FloatSexpVector(self.stn_slct.ngh_wgt) ngh_dists = ri.FloatSexpVector(self.stn_slct.ngh_dists) rslt = self.r_func(ngh_lon, ngh_lat, ngh_elev, ngh_tdi, ngh_lst, ngh_tair, ngh_wgt, ngh_dists) nug = rslt[0] psill = rslt[1] rng = rslt[2] return nug, psill, rng
def interp_pt(self, fix_invalid=True, stns_rm=None): ''' Interpolate daily and monthly normal Tmin and Tmax values for the current PtInterpTair.a_pt Parameters ---------- fix_invalid : boolean, optional If True, apply a fix on days where interpolated Tmax > Tmin. Default: True. stns_rm : ndarray or str, optional An array of station ids or a single station id for stations that should not be considered neighbors for the specific point. Returns ---------- tmin_dly : ndarray Daily interpolated Tmin tmax_dly : ndarray Daily interpolated Tmax tmin_norms : ndarray Interpolated monthly Tmin normals tmax_norms : ndarray Interpolated monthly Tmax normals tmin_se : ndarray Kriging standard error for monthly Tmin normals tmax_se : ndarray Kriging standard error for monthly Tmax normals ninvalid : int The number of days where Tmax > Tmin was fixed. If fix_invalid is False, will be set to 0. ''' # Set the monthly lst values and optim nnghs on the point for mth in np.arange(1, 13): self.a_pt[get_lst_varname(mth)] = self.a_pt["tmin%02d" % mth] self.a_pt[get_optim_varname(mth)], self.a_pt[ get_optim_anom_varname(mth)] = self.nnghparams_tmin[ self.a_pt[CLIMDIV]][mth] # Perform Tmin interpolation tmin_dly, tmin_norms, tmin_se = self.interp_tmin.interp( self.a_pt, stns_rm=stns_rm) # Set the monthly lst values and optim nnghs on the point for mth in np.arange(1, 13): self.a_pt[get_lst_varname(mth)] = self.a_pt["tmax%02d" % mth] self.a_pt[get_optim_varname(mth)], self.a_pt[ get_optim_anom_varname(mth)] = self.nnghparams_tmax[ self.a_pt[CLIMDIV]][mth] # Perform Tmax interpolation tmax_dly, tmax_norms, tmax_se = self.interp_tmax.interp( self.a_pt, stns_rm=stns_rm) ninvalid = 0 if fix_invalid: tmin_dly, tmax_dly, ninvalid = tmin_tmax_fixer(tmin_dly, tmax_dly) if ninvalid > 0: tmin_dly_norm = np.take(tmin_dly, self.daysNormMask) tmax_dly_norm = np.take(tmax_dly, self.daysNormMask) tmin_mthly = np.array([ np.mean(np.take(tmin_dly_norm, amask)) for amask in self.yrMthsMasks ]) tmax_mthly = np.array([ np.mean(np.take(tmax_dly_norm, amask)) for amask in self.yrMthsMasks ]) tmin_norms = np.array([ np.mean(np.take(tmin_mthly, amask)) for amask in self.mth_masks ]) tmax_norms = np.array([ np.mean(np.take(tmax_mthly, amask)) for amask in self.mth_masks ]) return tmin_dly, tmax_dly, tmin_norms, tmax_norms, tmin_se, tmax_se, ninvalid