def gwr_mth(self, pt, mth, nnghs=None, stns_rm=None): ''' Run geographically weighted regression of daily temperature anomalies for a specific month and point location. The function interpolates daily anomalies using GWR and then adds the anomalies to the point's monthly normal and returns the actual daily values. Parameters ---------- pt : structured array A structured array containing the point's latitude, longitude, elevation, topographic dissection index, and average land skin temperatures for each month. An empty point can be initialized with build_empty_pt() nnghs : int, optional The number of closest neighboring stations to use for the GWR. If None, nnghs will be set to the optimized number of GWR neighbors for the point's region. stns_rm : ndarray or str, optional An array of station ids or a single station id for stations that should not be considered neighbors for the specific point. Returns ---------- interp_vals : ndarray A 1-D array containing the GWR interpolated actual daily values for the specified month. ''' if nnghs == None: # Get the nnghs to use from the optimal values # at surrounding stations nnghs = self.__get_nnghs(pt, mth, stns_rm) self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], nnghs, load_obs=True, stns_rm=stns_rm, obs_mth=mth) ngh_obs = self.stn_slct.ngh_obs ngh_stns = self.stn_slct.ngh_stns ngh_wgt = self.stn_slct.ngh_wgt ngh_obs_cntr = ngh_obs - ngh_stns[get_norm_varname(mth)] # Perform a GWR for each day X = [ngh_stns[avar] for avar in self.mthly_predictors[mth]] X = np.column_stack(X) x = [pt[avar] for avar in self.mthly_predictors[mth]] x = np.array(x) interp_anom = _gwr_series(X, x, ngh_obs_cntr, ngh_wgt) # Add interpolated anomalies to monthly norm to get actual values interp_vals = interp_anom + pt[get_norm_varname(mth)] return interp_vals
def run_xval(self, stn_id, abw_nngh): ''' Run leave-one-out cross validations for a specific station id and a set of different neighbor bandwidths. Parameters ---------- stn_id : str Station id for cross validation abw_nngh : ndarray A 1-D array of different neighbor station bandwidths for which to run cross validation. Returns ---------- err : ndarray A 12 * N array of cross validations error (modeled - observed) where N is the number of bandwidths specified by abw_nngh ''' xval_stn = self.stn_da.stns[self.stn_da.stn_idxs[stn_id]] err = np.zeros((12, abw_nngh.size)) xvalNorms = np.array( [xval_stn[get_norm_varname(mth)] for mth in np.arange(1, 13)]) for bw_nngh, x in zip(abw_nngh, np.arange(abw_nngh.size)): interp_norms = self.krig.krigall(xval_stn, bw_nngh, stns_rm=xval_stn[STN_ID]) err[:, x] = interp_norms - xvalNorms return err
def proc_write(fpath_stndb, elem, fpath_out, nwrkers): status = MPI.Status() nwrkrs_done = 0 stn_da = StationSerialDataDb(fpath_stndb, elem) stn_ids = stn_da.stn_ids stns = stn_da.stns stn_mask = np.logical_and(np.isfinite(stn_da.stns[MASK]), np.isnan(stn_da.stns[BAD])) days = stn_da.days stn_da.ds.close() stn_da = None print "WRITER: Creating output station netCDF database..." create_quick_db(fpath_out, stns, days, DB_VARIABLES[elem]) stnda_out = StationSerialDataDb(fpath_out, elem, mode='r+') mth_names = [] for mth in np.arange(1, 13): norm_var_name = get_norm_varname(mth) stnda_out.add_stn_variable(norm_var_name, '', units='C', dtype='f8', fill_value=netCDF4.default_fillvals['f8']) mth_names.append(norm_var_name) stnda_out.ds.sync() print "WRITER: Output station netCDF database created." mths = np.arange(12) stat_chk = StatusCheck(np.sum(stn_mask), 50) while 1: stn_id, tair_daily, tair_norms = MPI.COMM_WORLD.recv( source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: print "WRITER: Finished" return 0 else: x = np.nonzero(stn_ids == stn_id)[0][0] stnda_out.ds.variables[elem][:, x] = tair_daily for i in mths: stnda_out.ds.variables[mth_names[i]][x] = tair_norms[i] stnda_out.ds.sync() stat_chk.increment()
def run_xval(self, stn_id, a_nnghs): xval_stn = self.stn_da.stns[self.stn_da.stn_idxs[stn_id]] xval_obs = self.stn_da.load_obs(xval_stn[STN_ID]) biasAll = np.zeros((a_nnghs.size, 12)) maeAll = np.zeros((a_nnghs.size, 12)) r2All = np.zeros((a_nnghs.size, 12)) for x in np.arange(a_nnghs.size): nnghs = a_nnghs[x] biasMths = np.zeros(12) maeMths = np.zeros(12) r2Mths = np.zeros(12) for mth in np.arange(1, 13): xval_anom = xval_obs[self.stn_da.mth_idx[mth]] - xval_stn[ get_norm_varname(mth)] interp_tair = self.gwr.gwr_mth(xval_stn, mth, nnghs, stns_rm=xval_stn[STN_ID]) interp_anom = interp_tair - xval_stn[get_norm_varname(mth)] difs = interp_anom - xval_anom bias = np.mean(difs) mae = np.mean(np.abs(difs)) r_value = stats.linregress(interp_anom, xval_anom)[2] r2 = r_value**2 # r-squared value; variance explained biasMths[mth - 1] = bias maeMths[mth - 1] = mae r2Mths[mth - 1] = r2 biasAll[x, :] = biasMths maeAll[x, :] = maeMths r2All[x, :] = r2Mths return biasAll, maeAll, r2All
def krigall(self, pt, nnghs, stns_rm=None): ''' Run moving window variogram fitting and regression kriging to interpolate monthly temperature normals to a single point location. Parameters ---------- pt : structured array A structured array containing the point's latitude, longitude, elevation, topographic dissection index, and average land skin temperatures for each month. An empty point can be initialized with build_empty_pt() nnghs : int The number of neighboring stations to use. stns_rm : ndarray or str, optional An array of station ids or a single station id for stations that should not be considered neighbors for the specific point. Returns ---------- interp_norms : ndarray A 1-D array of size 12 containing the interpolated monthly normals ''' self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], nnghs, load_obs=False, stns_rm=stns_rm) nghs = self.stn_slct.ngh_stns ngh_lon = ri.FloatSexpVector(nghs[LON]) ngh_lat = ri.FloatSexpVector(nghs[LAT]) ngh_elev = ri.FloatSexpVector(nghs[ELEV]) ngh_tdi = ri.FloatSexpVector(nghs[TDI]) ngh_wgt = ri.FloatSexpVector(self.stn_slct.ngh_wgt) ngh_dists = ri.FloatSexpVector(self.stn_slct.ngh_dists) interp_norms = np.zeros(12) for mth in np.arange(1, 13): ngh_lst = ri.FloatSexpVector(nghs[get_lst_varname(mth)]) ngh_tair = ri.FloatSexpVector(nghs[get_norm_varname(mth)]) pt_svp = ri.FloatSexpVector((pt[LON], pt[LAT], pt[ELEV], pt[TDI], pt[get_lst_varname(mth)])) rslt = self.r_func(pt_svp, ngh_lon, ngh_lat, ngh_elev, ngh_tdi, ngh_lst, ngh_tair, ngh_wgt, ngh_dists) interp_norms[mth - 1] = rslt[0] return interp_norms
def gwr_mth(self, pt, mth, nnghs=None, stns_rm=None): if nnghs == None: # Get the nnghs to use from the optimal values at surrounding stations nnghs = self._GwrTairAnom__get_nnghs(pt, mth, stns_rm) self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], nnghs, load_obs=True, stns_rm=stns_rm, obs_mth=mth) ngh_obs = self.stn_slct.ngh_obs ngh_stns = self.stn_slct.ngh_stns ngh_wgt = self.stn_slct.ngh_wgt ngh_obs_cntr = ngh_obs - ngh_stns[get_norm_varname(mth)] a_pt = np.array( [pt[LON], pt[LAT], pt[ELEV], pt[TDI], pt[get_lst_varname(mth)]]) rslt = r.gwr_anomaly( robjects.FloatVector(ngh_stns[LON]), robjects.FloatVector(ngh_stns[LAT]), robjects.FloatVector(ngh_stns[ELEV]), robjects.FloatVector(ngh_stns[TDI]), robjects.FloatVector(ngh_stns[get_lst_varname(mth)]), robjects.FloatVector(ngh_wgt), robjects.Matrix(ngh_obs_cntr), robjects.FloatVector(a_pt)) fit_anom = np.array(rslt.rx('fit_anom')) nrow = np.array(rslt.rx('fit_nrow'))[0] ncol = np.array(rslt.rx('fit_ncol'))[0] fit_anom = np.reshape(fit_anom, (nrow, ncol), order='F') interp_anom = np.array(rslt.rx('pt_anom')).ravel() interp_vals = interp_anom + pt[get_norm_varname(mth)] return interp_vals
def interp(self, pt, stns_rm=None): ''' Interpolate monthly temperature normals and daily temperatures to a single point location. Parameters ---------- pt : structured array A structured array containing the point's latitude, longitude, elevation, topographic dissection index, and average land skin temperatures for each month. An empty point can be initialized with build_empty_pt() stns_rm : ndarray or str, optional An array of station ids or a single station id for stations that should not be considered neighbors for the specific point. Returns ---------- tair_daily : ndarray A 1-D array of interpolated daily temperatures. tair_norms : ndarray A 1-D array of size 12 with the interpolated monthly temperature normals. tair_se : ndarray A 1-D array of size 12 with the kriging standard errors for the interpolated monthly temperature normals. ''' tair_daily = np.zeros(self.ndays) tair_norms = np.zeros(12) tair_se = np.zeros(12) for mth in np.arange(1, 13): tair_mean, tair_var = self.krig_tair.krig(pt, mth, stns_rm=stns_rm) std_err, ci = self.krig_tair.std_err_ci(tair_mean, tair_var) pt[get_norm_varname(mth)] = tair_mean tair_norms[mth - 1] = tair_mean tair_se[mth - 1] = std_err tair_daily[self.mth_masks[mth]] = self.gwr_tair.gwr_mth( pt, mth, stns_rm=stns_rm) return tair_daily, tair_norms, tair_se
def build_empty_pt(): ptDtype = [(LON, np.float64), (LAT, np.float64), (ELEV, np.float64), (TDI, np.float64), (CLIMDIV, np.float64), (MASK, np.float64)] ptDtype.extend([("tmin%02d" % mth, np.float64) for mth in np.arange(1, 13)]) ptDtype.extend([("tmax%02d" % mth, np.float64) for mth in np.arange(1, 13)]) ptDtype.extend([(get_norm_varname(mth), np.float64) for mth in np.arange(1, 13)]) ptDtype.extend([(get_optim_varname(mth), np.float64) for mth in np.arange(1, 13)]) ptDtype.extend([(get_lst_varname(mth), np.float64) for mth in np.arange(1, 13)]) ptDtype.extend([(get_optim_anom_varname(mth), np.float64) for mth in np.arange(1, 13)]) a_pt = np.empty(1, dtype=ptDtype) return a_pt[0]
def gwr_predict(self, pt, mth, nnghs=None, stns_rm=None): if nnghs is None: # Get the nnghs to use from the optimal values at surrounding stations nnghs = self.__get_nnghs(pt, mth, stns_rm) self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], nnghs, load_obs=False, stns_rm=stns_rm) nghs = self.stn_slct.ngh_stns ngh_lon = ri.FloatSexpVector(nghs[LON]) ngh_lat = ri.FloatSexpVector(nghs[LAT]) ngh_elev = ri.FloatSexpVector(nghs[ELEV]) ngh_tdi = ri.FloatSexpVector(nghs[TDI]) ngh_lst = ri.FloatSexpVector(nghs[get_lst_varname(mth)]) ngh_tair = ri.FloatSexpVector(nghs[get_norm_varname(mth)]) pt_svp = ri.FloatSexpVector( (pt[LON], pt[LAT], pt[ELEV], pt[TDI], pt[get_lst_varname(mth)])) ngh_wgt = ri.FloatSexpVector(self.stn_slct.ngh_wgt) rslt = self.r_gwr_func(ngh_lon, ngh_lat, ngh_elev, ngh_tdi, ngh_lst, ngh_tair, ngh_wgt, pt_svp) tair_mean = rslt[0] tair_var = rslt[1] bad_interp = rslt[2] if bad_interp != 0: print "".join( ["ERROR: ", str(bad_interp), " bad interp: ", str(pt)]) return tair_mean, tair_var
def __init__(self, stn_da): ''' Parameters ---------- stnda : twx.db.StationSerialDataDb A StationSerialDataDb object pointing to the database from which observations will be loaded. ''' self.stn_da = stn_da mask_stns = np.isnan(self.stn_da.stns[BAD]) self.stn_slct = StationSelect(self.stn_da, stn_mask=mask_stns, rm_zero_dist_stns=True) self.vnames_norm = [get_norm_varname(mth) for mth in np.arange(1, 13)] self.vnames_lst = [get_lst_varname(mth) for mth in np.arange(1, 13)] self.df_stns = pd.DataFrame(self.stn_da.stns) self.df_stns.index = self.df_stns[STN_ID] # Calculate annual means for monthly LST and Tair normals self.df_stns['lst'] = self.df_stns[self.vnames_lst].mean(axis=1) self.df_stns['norm'] = self.df_stns[self.vnames_norm].mean(axis=1)
def krig(self, pt, mth, nnghs=None, vario_params=None, stns_rm=None): ''' Run moving window regression kriging to interpolate a temperature normal for a specific month to a single point location. Parameters ---------- pt : structured array A structured array containing the point's latitude, longitude, elevation, topographic dissection index, and average land skin temperatures for each month. An empty point can be initialized with build_empty_pt() mth : int, optional The month for which to interpolate a temperature normal nnghs : int, optional The number of neighboring stations to use. If not provided, nnghs will be determined from the previously determined optimal number at surrounding neighboring stations. vario_params : tuple, optional A tuple of size 3 (nugget, sill, range). If not provided, the variogram parameters will be determined from previously fit variogram parameters at neighboring stations stns_rm : ndarray or str, optional An array of station ids or a single station id for stations that should not be considered neighbors for the specific point. Returns ---------- tair_mean : float The interpolated temperature normal. tair_var : float The kriging prediction variance for the temperature normal. ''' if nnghs is None: # Get the nnghs to use from the optimal values at surrounding stations nnghs = self.__get_nnghs(pt, mth, stns_rm) self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], nnghs, load_obs=False, stns_rm=stns_rm) if vario_params is None: nug, psill, vrange = self.__get_vario_params(pt, mth) else: nug, psill, vrange = vario_params nghs = self.stn_slct.ngh_stns ngh_lon = ri.FloatSexpVector(nghs[LON]) ngh_lat = ri.FloatSexpVector(nghs[LAT]) ngh_elev = ri.FloatSexpVector(nghs[ELEV]) ngh_tdi = ri.FloatSexpVector(nghs[TDI]) ngh_lst = ri.FloatSexpVector(nghs[get_lst_varname(mth)]) ngh_tair = ri.FloatSexpVector(nghs[get_norm_varname(mth)]) pt_svp = ri.FloatSexpVector( (pt[LON], pt[LAT], pt[ELEV], pt[TDI], pt[get_lst_varname(mth)])) nug = ri.FloatSexpVector([nug]) psill = ri.FloatSexpVector([psill]) vrange = ri.FloatSexpVector([vrange]) ngh_wgt = ri.FloatSexpVector(self.stn_slct.ngh_wgt) rslt = self.r_krig_func(ngh_lon, ngh_lat, ngh_elev, ngh_tdi, ngh_lst, ngh_tair, ngh_wgt, pt_svp, nug, psill, vrange) tair_mean = rslt[0] tair_var = rslt[1] bad_interp = rslt[2] if bad_interp != 0: print "".join( ["ERROR: ", str(bad_interp), " bad interp: ", str(pt)]) return tair_mean, tair_var
def get_krig_params(self, pt, mth, rm_stnid=None): ''' Get the moving window regression kriging variogram parameters for a specific point and month. Currently assumes an exponential variogram Parameters ---------- pt : structured array A structured array containing the point's latitude, longitude, elevation, topographic dissection index, and average land skin temperatures for each month. An empty point can be initialized with build_empty_pt() mth : int The specific month as an integer (1-12) rm_stnid : ndarray or str, optional An array of station ids or a single station id for stations that should not be considered neighbors for the specific point. Returns ---------- nug : float Exponential variogram nugget. psill : float Exponential variogram partial sill. rng : float Exponential variogram range. ''' # First determine the nnghs to use based on smoothed weighted average of # the optimal nnghs bandwidth at each station point. self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], DFLT_INIT_NNGHS, load_obs=False) indomain_mask = np.isfinite( self.stn_slct.ngh_stns[get_optim_varname(mth)]) domain_stns = self.stn_slct.ngh_stns[indomain_mask] if domain_stns.size == 0: raise Exception( "Cannot determine the optimal # of neighbors to use!") n_wgt = self.stn_slct.ngh_wgt[indomain_mask] nnghs = np.int( np.round( np.average(domain_stns[get_optim_varname(mth)], weights=n_wgt))) # Now use the optimal nnghs to get the krig params for this mth self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], nnghs, load_obs=False) nghs = self.stn_slct.ngh_stns ngh_lon = ri.FloatSexpVector(nghs[LON]) ngh_lat = ri.FloatSexpVector(nghs[LAT]) ngh_elev = ri.FloatSexpVector(nghs[ELEV]) ngh_tdi = ri.FloatSexpVector(nghs[TDI]) ngh_lst = ri.FloatSexpVector(nghs[get_lst_varname(mth)]) ngh_tair = ri.FloatSexpVector(nghs[get_norm_varname(mth)]) ngh_wgt = ri.FloatSexpVector(self.stn_slct.ngh_wgt) ngh_dists = ri.FloatSexpVector(self.stn_slct.ngh_dists) rslt = self.r_func(ngh_lon, ngh_lat, ngh_elev, ngh_tdi, ngh_lst, ngh_tair, ngh_wgt, ngh_dists) nug = rslt[0] psill = rslt[1] rng = rslt[2] return nug, psill, rng