Esempio n. 1
0
def set_optim_nstns_tair_norm(stnda, path_xval_ds):
    '''
    Set the local optimal number of stations to be used for monthly
    normal interpolation for each U.S. climate division based on 
    cross-validation mean absolute error.
    
    Parameters
    ----------
    stnda : twx.db.StationSerialDataDb
        A StationSerialDataDb object pointing to the
        database for which the local optimal number of
        neighbors should be set. 
    path_xval_ds : str
        Path where netCDF cross-validation MAE files from
        create_climdiv_optim_nstns_db are located
    '''

    climdiv_stns = stnda.stns[CLIMDIV]

    vars_optim = {}
    for mth in np.arange(1, 13):

        varname_optim = get_optim_varname(mth)
        long_name = "Optimal number of neighbors to use for monthly normal interpolation for month %d" % mth
        var_optim = stnda.add_stn_variable(
            varname_optim,
            long_name,
            "",
            'f8',
            fill_value=netCDF4.default_fillvals['f8'])
        vars_optim[mth] = var_optim

    divs = np.unique(climdiv_stns[np.isfinite(climdiv_stns)])

    stchk = StatusCheck(divs.size, 10)

    for clim_div in divs:

        fpath = os.path.join(
            path_xval_ds,
            "optim_nstns_%s_climdiv%d.nc" % (stnda.var_name, clim_div))

        ds_climdiv = Dataset(fpath)

        mae_climdiv = ds_climdiv.variables['mae'][:]
        nnghs_climdiv = ds_climdiv.variables['min_nghs'][:]

        climdiv_mask = np.nonzero(climdiv_stns == clim_div)[0]

        for mth in np.arange(1, 13):

            mae_climdiv_mth = mae_climdiv[mth - 1, :, :]
            mmae = np.mean(mae_climdiv_mth, axis=1)
            min_idx = np.argmin(mmae)
            vars_optim[mth][climdiv_mask] = nnghs_climdiv[min_idx]

        stchk.increment()

    stnda.ds.sync()
Esempio n. 2
0
    def __get_nnghs(self, pt, mth, stns_rm=None):

        self.stn_slct.set_ngh_stns(pt[LAT],
                                   pt[LON],
                                   DFLT_INIT_NNGHS,
                                   load_obs=False,
                                   stns_rm=stns_rm)

        indomain_mask = np.isfinite(
            self.stn_slct.ngh_stns[get_optim_varname(mth)])
        domain_stns = self.stn_slct.ngh_stns[indomain_mask]

        if domain_stns.size == 0:
            raise Exception(
                "Cannot determine the optimal # of neighbors to use!")

        n_wgt = self.stn_slct.ngh_wgt[indomain_mask]

        nnghs = np.int(
            np.round(
                np.average(domain_stns[get_optim_varname(mth)],
                           weights=n_wgt)))

        return nnghs
Esempio n. 3
0
def _get_rgn_nnghs_dict(stns):

    rgns = np.unique(stns[CLIMDIV][np.isfinite(stns[CLIMDIV])])

    nnghsAll = {}

    for rgn in rgns:

        rgn_mask = stns[CLIMDIV] == rgn
        nnghsRgn = {}

        for mth in np.arange(1, 13):
            nnghsRgn[mth] = (stns[get_optim_varname(mth)][rgn_mask][0],
                             stns[get_optim_anom_varname(mth)][rgn_mask][0])

        nnghsAll[rgn] = nnghsRgn

    return nnghsAll
Esempio n. 4
0
def build_empty_pt():

    ptDtype = [(LON, np.float64), (LAT, np.float64), (ELEV, np.float64),
               (TDI, np.float64), (CLIMDIV, np.float64), (MASK, np.float64)]
    ptDtype.extend([("tmin%02d" % mth, np.float64)
                    for mth in np.arange(1, 13)])
    ptDtype.extend([("tmax%02d" % mth, np.float64)
                    for mth in np.arange(1, 13)])
    ptDtype.extend([(get_norm_varname(mth), np.float64)
                    for mth in np.arange(1, 13)])
    ptDtype.extend([(get_optim_varname(mth), np.float64)
                    for mth in np.arange(1, 13)])
    ptDtype.extend([(get_lst_varname(mth), np.float64)
                    for mth in np.arange(1, 13)])
    ptDtype.extend([(get_optim_anom_varname(mth), np.float64)
                    for mth in np.arange(1, 13)])

    a_pt = np.empty(1, dtype=ptDtype)

    return a_pt[0]
Esempio n. 5
0
    def get_krig_params(self, pt, mth, rm_stnid=None):
        '''
        Get the moving window regression kriging variogram
        parameters for a specific point and month. Currently
        assumes an exponential variogram
                
        Parameters
        ----------
        pt : structured array
            A structured array containing the point's latitude, longitude,
            elevation, topographic dissection index, and average land skin 
            temperatures for each month. An empty point can be initialized
            with build_empty_pt()
        mth : int
            The specific month as an integer (1-12)
        rm_stnid : ndarray or str, optional
            An array of station ids or a single station id for stations that
            should not be considered neighbors for the specific point.
        
        Returns
        ----------
        nug : float
            Exponential variogram nugget.
        psill : float
            Exponential variogram partial sill.
        rng : float
            Exponential variogram range.
        
        '''

        # First determine the nnghs to use based on smoothed weighted average of
        # the optimal nnghs bandwidth at each station point.
        self.stn_slct.set_ngh_stns(pt[LAT],
                                   pt[LON],
                                   DFLT_INIT_NNGHS,
                                   load_obs=False)

        indomain_mask = np.isfinite(
            self.stn_slct.ngh_stns[get_optim_varname(mth)])

        domain_stns = self.stn_slct.ngh_stns[indomain_mask]

        if domain_stns.size == 0:
            raise Exception(
                "Cannot determine the optimal # of neighbors to use!")

        n_wgt = self.stn_slct.ngh_wgt[indomain_mask]

        nnghs = np.int(
            np.round(
                np.average(domain_stns[get_optim_varname(mth)],
                           weights=n_wgt)))

        # Now use the optimal nnghs to get the krig params for this mth
        self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], nnghs, load_obs=False)

        nghs = self.stn_slct.ngh_stns
        ngh_lon = ri.FloatSexpVector(nghs[LON])
        ngh_lat = ri.FloatSexpVector(nghs[LAT])
        ngh_elev = ri.FloatSexpVector(nghs[ELEV])
        ngh_tdi = ri.FloatSexpVector(nghs[TDI])
        ngh_lst = ri.FloatSexpVector(nghs[get_lst_varname(mth)])
        ngh_tair = ri.FloatSexpVector(nghs[get_norm_varname(mth)])
        ngh_wgt = ri.FloatSexpVector(self.stn_slct.ngh_wgt)
        ngh_dists = ri.FloatSexpVector(self.stn_slct.ngh_dists)

        rslt = self.r_func(ngh_lon, ngh_lat, ngh_elev, ngh_tdi, ngh_lst,
                           ngh_tair, ngh_wgt, ngh_dists)
        nug = rslt[0]
        psill = rslt[1]
        rng = rslt[2]

        return nug, psill, rng
Esempio n. 6
0
    def interp_pt(self, fix_invalid=True, stns_rm=None):
        '''
        Interpolate daily and monthly normal Tmin and Tmax values
        for the current PtInterpTair.a_pt
        
        Parameters
        ----------
        fix_invalid : boolean, optional
            If True, apply a fix on days where interpolated
            Tmax > Tmin. Default: True.
        stns_rm : ndarray or str, optional
            An array of station ids or a single station id for stations that
            should not be considered neighbors for the specific point.
            
        Returns
        ----------
        tmin_dly : ndarray
            Daily interpolated Tmin
        tmax_dly : ndarray
            Daily interpolated Tmax
        tmin_norms : ndarray
            Interpolated monthly Tmin normals
        tmax_norms : ndarray
            Interpolated monthly Tmax normals
        tmin_se : ndarray
            Kriging standard error for monthly Tmin normals
        tmax_se : ndarray
            Kriging standard error for monthly Tmax normals
        ninvalid : int
            The number of days where Tmax > Tmin was fixed.
            If fix_invalid is False, will be set to 0.        
        '''

        # Set the monthly lst values and optim nnghs on the point
        for mth in np.arange(1, 13):

            self.a_pt[get_lst_varname(mth)] = self.a_pt["tmin%02d" % mth]
            self.a_pt[get_optim_varname(mth)], self.a_pt[
                get_optim_anom_varname(mth)] = self.nnghparams_tmin[
                    self.a_pt[CLIMDIV]][mth]

        # Perform Tmin interpolation
        tmin_dly, tmin_norms, tmin_se = self.interp_tmin.interp(
            self.a_pt, stns_rm=stns_rm)

        # Set the monthly lst values and optim nnghs on the point
        for mth in np.arange(1, 13):

            self.a_pt[get_lst_varname(mth)] = self.a_pt["tmax%02d" % mth]
            self.a_pt[get_optim_varname(mth)], self.a_pt[
                get_optim_anom_varname(mth)] = self.nnghparams_tmax[
                    self.a_pt[CLIMDIV]][mth]

        # Perform Tmax interpolation
        tmax_dly, tmax_norms, tmax_se = self.interp_tmax.interp(
            self.a_pt, stns_rm=stns_rm)

        ninvalid = 0

        if fix_invalid:

            tmin_dly, tmax_dly, ninvalid = tmin_tmax_fixer(tmin_dly, tmax_dly)

            if ninvalid > 0:

                tmin_dly_norm = np.take(tmin_dly, self.daysNormMask)
                tmax_dly_norm = np.take(tmax_dly, self.daysNormMask)
                tmin_mthly = np.array([
                    np.mean(np.take(tmin_dly_norm, amask))
                    for amask in self.yrMthsMasks
                ])
                tmax_mthly = np.array([
                    np.mean(np.take(tmax_dly_norm, amask))
                    for amask in self.yrMthsMasks
                ])
                tmin_norms = np.array([
                    np.mean(np.take(tmin_mthly, amask))
                    for amask in self.mth_masks
                ])
                tmax_norms = np.array([
                    np.mean(np.take(tmax_mthly, amask))
                    for amask in self.mth_masks
                ])

        return tmin_dly, tmax_dly, tmin_norms, tmax_norms, tmin_se, tmax_se, ninvalid