Beispiel #1
0
    def __init__(self, mean, cov, df=None, random_state=1):
        self.mean = mean
        self.cov = cov
        self.sd = sd = np.sqrt(np.diag(cov))
        if df is None:
            self.dist = stats.multivariate_normal(mean=mean, cov=cov)
            self.udist = stats.norm(loc=mean, scale=sd)
            self.std_udist = stats.norm(loc=0., scale=1.)
        else:
            sigma = cov * (df - 2) / df
            self.dist = MVT(mean=mean, sigma=sigma, df=df)
            self.udist = stats.t(loc=mean, scale=sd, df=df)
            self.std_udist = stats.t(loc=0., scale=1., df=df)
        self.dist.random_state = random_state
        self.udist.random_state = random_state
        self.std_udist.random_state = random_state

        self._chol = cholesky(self.cov)
        self._pchol = pivoted_cholesky(self.cov)

        e, v = np.linalg.eigh(self.cov)
        # To match Bastos and O'Hagan definition
        # i.e., eigenvalues ordered from largest to smallest
        e, v = e[::-1], v[:, ::-1]
        ee = np.diag(np.sqrt(e))
        self._eig = (v @ ee)
Beispiel #2
0
    def train_analytic(self):
        """ Calculating the analytic distribution posteriors given on page 15 of Lori's 
        Optimal Classification eq 34. """
        self.nustar = self.nu + self.n

        samplemean = self.data.mean(axis=0)
        samplecov = np.cov(self.data.T)

        self.mustar = (self.nu * self.priormu + self.n * samplemean) \
                / (self.nu + self.n)
        self.kappastar = self.kappa + self.n
        self.Sstar = self.S + (self.n-1)*samplecov + self.nu*self.n/(self.nu+self.n)\
                * np.outer((samplemean - self.priormu), (samplemean - self.priormu))
                
        # Now calculate effective class conditional densities from eq 55 page 21
        self.fx = MVT(
                self.mustar, 
                (self.nustar+1)/(self.kappastar-self.D+1)/self.nustar * self.Sstar, 
                self.kappastar - self.D + 1)
Beispiel #3
0
class GaussianBayes(object):
    def __init__(self, priormu, nu, kappa, S, data, alpha=1, true_dist=None):
        """ Initialize Gaussian distribution with data and priors
        then calculate the analytic posterior """
        self.true_dist = true_dist

        self.data = data.copy()
        self.n = data.shape[0]

        self.priormu = np.asarray(priormu)
        self.nu = nu
        self.kappa = kappa
        self.S = np.asarray(S)
        self.alpha = alpha
        self.D = self.data.shape[1]

        self.train_analytic()

    def train_analytic(self):
        """ Calculating the analytic distribution posteriors given on page 15 of Lori's 
        Optimal Classification eq 34. """
        self.nustar = self.nu + self.n

        samplemean = self.data.mean(axis=0)
        samplecov = np.cov(self.data.T)

        self.mustar = (self.nu * self.priormu + self.n * samplemean) / (self.nu + self.n)
        self.kappastar = self.kappa + self.n
        self.Sstar = (
            self.S
            + (self.n - 1) * samplecov
            + self.nu * self.n / (self.nu + self.n) * np.outer((samplemean - self.priormu), (samplemean - self.priormu))
        )

        # Now calculate effective class conditional densities from eq 55 page 21
        self.fx = MVT(
            self.mustar,
            (self.nustar + 1) / (self.kappastar - self.D + 1) / self.nustar * self.Sstar,
            self.kappastar - self.D + 1,
        )

    def eval_posterior(self, pts):
        return self.fx.logpdf(pts)
Beispiel #4
0
class GaussianBayes(object):
    def __init__(self, priormu, nu, kappa, S, data, alpha=1, true_dist=None):
        """ Initialize Gaussian distribution with data and priors
        then calculate the analytic posterior """
        self.true_dist = true_dist

        self.data = data.copy()
        self.n = data.shape[0]

        self.priormu = np.asarray(priormu)
        self.nu = nu
        self.kappa = kappa
        self.S = np.asarray(S)
        self.alpha = alpha
        self.D = self.data.shape[1]

        self.train_analytic()

    def train_analytic(self):
        """ Calculating the analytic distribution posteriors given on page 15 of Lori's 
        Optimal Classification eq 34. """
        self.nustar = self.nu + self.n

        samplemean = self.data.mean(axis=0)
        samplecov = np.cov(self.data.T)

        self.mustar = (self.nu * self.priormu + self.n * samplemean) \
                / (self.nu + self.n)
        self.kappastar = self.kappa + self.n
        self.Sstar = self.S + (self.n-1)*samplecov + self.nu*self.n/(self.nu+self.n)\
                * np.outer((samplemean - self.priormu), (samplemean - self.priormu))
                
        # Now calculate effective class conditional densities from eq 55 page 21
        self.fx = MVT(
                self.mustar, 
                (self.nustar+1)/(self.kappastar-self.D+1)/self.nustar * self.Sstar, 
                self.kappastar - self.D + 1)

    def eval_posterior(self, pts):
        return self.fx.logpdf(pts)
Beispiel #5
0
    def train_analytic(self):
        """ Calculating the analytic distribution posteriors given on page 15 of Lori's 
        Optimal Classification eq 34. """
        self.nustar = self.nu + self.n

        samplemean = self.data.mean(axis=0)
        samplecov = np.cov(self.data.T)

        self.mustar = (self.nu * self.priormu + self.n * samplemean) / (self.nu + self.n)
        self.kappastar = self.kappa + self.n
        self.Sstar = (
            self.S
            + (self.n - 1) * samplecov
            + self.nu * self.n / (self.nu + self.n) * np.outer((samplemean - self.priormu), (samplemean - self.priormu))
        )

        # Now calculate effective class conditional densities from eq 55 page 21
        self.fx = MVT(
            self.mustar,
            (self.nustar + 1) / (self.kappastar - self.D + 1) / self.nustar * self.Sstar,
            self.kappastar - self.D + 1,
        )
Beispiel #6
0
    def __init__(self):
        np.random.seed(1234)

        self.n = 4 # Data points

        self.true_mu = 0.0
        self.true_sigma = 1 #di.invgamma.rvs(3)

        # For G function calculation and averaging
        self.grid_n = 100
        low,high = -4, 4
        self.gextent = (low,high)
        self.grid = np.linspace(low,high,self.grid_n)
        self.gavg = np.zeros(self.grid_n)
        self.numgavg = 0

        #self.data = di.norm.rvs(size=self.n)
        self.data = np.array([0.0, -0.0, 0.5, -0.5])
        assert self.data.size == self.n
        
        ######## Starting point of MCMC Run #######
        self.mu = 0.0
        self.sigma = 2.0

        ###### Bookeeping ######
        self.oldmu = None
        self.oldsigma = None

        ##### Prior Values and Confidences ######
        self.priorsigma = 2
        self.kappa = 1
        self.priormu = 0
        self.nu = 8.0
        #### Calculating the Analytic solution given on page 15 of Lori's 
        #### Optimal Classification eq 34.
        self.nustar = self.nu + self.n

        samplemean = self.data.mean()
        samplevar = np.cov(self.data)

        self.mustar = (self.nu*self.priormu + self.n * samplemean) \
                / (self.nu + self.n)
        self.kappastar = self.kappa + self.n
        self.Sstar = self.priorsigma + (self.n-1)*samplevar + self.nu*self.n/(self.nu+self.nu)\
                * (samplemean - self.priormu)**2
                
        #### Now calculate effective class conditional densities from eq 55
        #### page 21

        #self.fx = MVT(
                #self.mu0star, 
                #(self.nu0star+1)/(self.kappa0star-self.D+1)/self.nu0star * self.S0star, 
                #self.kappa0star - self.D + 1)
        # So I'm pretty sure this is incorrect below, off by some scaling
        # parameters
        self.fx = MVT(
                [self.mustar], 
                [(self.nustar+1)/(self.kappastar)/self.nustar * self.Sstar / 2],
                self.kappastar /2 )

        self.analyticfx = self.fx.logpdf(self.grid.reshape(-1,1))
Beispiel #7
0
class Classification():
    def __init__(self):
        np.random.seed(1234)

        self.n = 4 # Data points

        self.true_mu = 0.0
        self.true_sigma = 1 #di.invgamma.rvs(3)

        # For G function calculation and averaging
        self.grid_n = 100
        low,high = -4, 4
        self.gextent = (low,high)
        self.grid = np.linspace(low,high,self.grid_n)
        self.gavg = np.zeros(self.grid_n)
        self.numgavg = 0

        #self.data = di.norm.rvs(size=self.n)
        self.data = np.array([0.0, -0.0, 0.5, -0.5])
        assert self.data.size == self.n
        
        ######## Starting point of MCMC Run #######
        self.mu = 0.0
        self.sigma = 2.0

        ###### Bookeeping ######
        self.oldmu = None
        self.oldsigma = None

        ##### Prior Values and Confidences ######
        self.priorsigma = 2
        self.kappa = 1
        self.priormu = 0
        self.nu = 8.0
        #### Calculating the Analytic solution given on page 15 of Lori's 
        #### Optimal Classification eq 34.
        self.nustar = self.nu + self.n

        samplemean = self.data.mean()
        samplevar = np.cov(self.data)

        self.mustar = (self.nu*self.priormu + self.n * samplemean) \
                / (self.nu + self.n)
        self.kappastar = self.kappa + self.n
        self.Sstar = self.priorsigma + (self.n-1)*samplevar + self.nu*self.n/(self.nu+self.nu)\
                * (samplemean - self.priormu)**2
                
        #### Now calculate effective class conditional densities from eq 55
        #### page 21

        #self.fx = MVT(
                #self.mu0star, 
                #(self.nu0star+1)/(self.kappa0star-self.D+1)/self.nu0star * self.S0star, 
                #self.kappa0star - self.D + 1)
        # So I'm pretty sure this is incorrect below, off by some scaling
        # parameters
        self.fx = MVT(
                [self.mustar], 
                [(self.nustar+1)/(self.kappastar)/self.nustar * self.Sstar / 2],
                self.kappastar /2 )

        self.analyticfx = self.fx.logpdf(self.grid.reshape(-1,1))


    def propose(self):
        self.oldmu = self.mu
        self.oldsigma = self.sigma

        self.mu += np.random.randn()*0.1
        #self.mu = np.random.randn()
        self.sigma = di.invgamma.rvs(1)
        return 0

    def copy(self):
        return (self.mu, self.sigma, di.norm.rvs(loc=self.mu, scale=self.sigma))

    def reject(self):
        self.mu = self.oldmu
        self.sigma = self.oldsigma

    def energy(self):
        sum = 0.0
        sum -= di.norm.logpdf(self.data, loc=self.mu, scale=self.sigma).sum()
        #Now add in the priors...
        sum -= log(self.sigma)*(-0.5) - self.nu/2 * (self.mu-self.priormu)**2/self.sigma
        sum -= log(self.sigma)*(self.kappa+2)/(-2) - 0.5*self.priorsigma/self.sigma
        return sum

    def calc_gfunc(self):
        return di.norm.pdf(self.grid, loc=self.mu, scale=self.sigma) 

    def init_db(self, db, dbsize):
        pass
        #dtype = [('thetas',np.double),
                #('energies',np.double),
                #('funcs',np.double)]
        #if db == None:
            #return np.zeros(dbsize, dtype=dtype)
        #elif db.shape[0] != dbsize:
            #return np.resize(db, dbsize)
        #else:
            #raise Exception("DB Not inited")

    def save_to_db(self, db, theta, energy, iteration):
        #func = 0.0
        #db[iteration] = np.array([theta, energy, func])
        global mydb
        mydb.append(self.copy())

        # Update G function average
        self.numgavg += 1
        self.gavg += (self.calc_gfunc() - self.gavg) / self.numgavg
Beispiel #8
0
    def test_mvt_pdf(self):
        cov3 = self.cov3
        mu3 = self.mu3

        mvt = MVT((0, 0), 1, 5)
        assert_almost_equal(mvt.logpdf(np.array([0., 0.])),
                            -1.837877066409345,
                            decimal=15)
        assert_almost_equal(mvt.pdf(np.array([0., 0.])),
                            0.1591549430918953,
                            decimal=15)

        mvt.logpdf(np.array([1., 1.])) - (-3.01552989458359)

        mvt1 = MVT((0, 0), 1, 1)
        mvt1.logpdf(np.array([1., 1.])) - (-3.48579549941151)  #decimal=16

        rvs = mvt.rvs(100000)
        assert_almost_equal(np.cov(rvs, rowvar=0), mvt.cov, decimal=1)

        mvt31 = MVT(mu3, cov3, 1)
        assert_almost_equal(mvt31.pdf(cov3), [
            0.0007276818698165781, 0.0009980625182293658, 0.0027661422056214652
        ],
                            decimal=17)

        mvt = MVT(mu3, cov3, 3)
        assert_almost_equal(
            mvt.pdf(cov3),
            [0.000863777424247410, 0.001277510788307594, 0.004156314279452241],
            decimal=17)
Beispiel #9
0
    def __init__(self):
        np.random.seed(1234)

        self.n = 4  # Data points

        self.true_mu = 0.0
        self.true_sigma = 1  #di.invgamma.rvs(3)

        # For G function calculation and averaging
        self.grid_n = 100
        low, high = -4, 4
        self.gextent = (low, high)
        self.grid = np.linspace(low, high, self.grid_n)
        self.gavg = np.zeros(self.grid_n)
        self.numgavg = 0

        #self.data = di.norm.rvs(size=self.n)
        self.data = np.array([0.0, -0.0, 0.5, -0.5])
        assert self.data.size == self.n

        ######## Starting point of MCMC Run #######
        self.mu = 0.0
        self.sigma = 2.0

        ###### Bookeeping ######
        self.oldmu = None
        self.oldsigma = None

        ##### Prior Values and Confidences ######
        self.priorsigma = 2
        self.kappa = 1
        self.priormu = 0
        self.nu = 8.0
        #### Calculating the Analytic solution given on page 15 of Lori's
        #### Optimal Classification eq 34.
        self.nustar = self.nu + self.n

        samplemean = self.data.mean()
        samplevar = np.cov(self.data)

        self.mustar = (self.nu*self.priormu + self.n * samplemean) \
                / (self.nu + self.n)
        self.kappastar = self.kappa + self.n
        self.Sstar = self.priorsigma + (self.n-1)*samplevar + self.nu*self.n/(self.nu+self.nu)\
                * (samplemean - self.priormu)**2

        #### Now calculate effective class conditional densities from eq 55
        #### page 21

        #self.fx = MVT(
        #self.mu0star,
        #(self.nu0star+1)/(self.kappa0star-self.D+1)/self.nu0star * self.S0star,
        #self.kappa0star - self.D + 1)
        # So I'm pretty sure this is incorrect below, off by some scaling
        # parameters
        self.fx = MVT([self.mustar],
                      [(self.nustar + 1) /
                       (self.kappastar) / self.nustar * self.Sstar / 2],
                      self.kappastar / 2)

        self.analyticfx = self.fx.logpdf(self.grid.reshape(-1, 1))
Beispiel #10
0
class Classification():
    def __init__(self):
        np.random.seed(1234)

        self.n = 4  # Data points

        self.true_mu = 0.0
        self.true_sigma = 1  #di.invgamma.rvs(3)

        # For G function calculation and averaging
        self.grid_n = 100
        low, high = -4, 4
        self.gextent = (low, high)
        self.grid = np.linspace(low, high, self.grid_n)
        self.gavg = np.zeros(self.grid_n)
        self.numgavg = 0

        #self.data = di.norm.rvs(size=self.n)
        self.data = np.array([0.0, -0.0, 0.5, -0.5])
        assert self.data.size == self.n

        ######## Starting point of MCMC Run #######
        self.mu = 0.0
        self.sigma = 2.0

        ###### Bookeeping ######
        self.oldmu = None
        self.oldsigma = None

        ##### Prior Values and Confidences ######
        self.priorsigma = 2
        self.kappa = 1
        self.priormu = 0
        self.nu = 8.0
        #### Calculating the Analytic solution given on page 15 of Lori's
        #### Optimal Classification eq 34.
        self.nustar = self.nu + self.n

        samplemean = self.data.mean()
        samplevar = np.cov(self.data)

        self.mustar = (self.nu*self.priormu + self.n * samplemean) \
                / (self.nu + self.n)
        self.kappastar = self.kappa + self.n
        self.Sstar = self.priorsigma + (self.n-1)*samplevar + self.nu*self.n/(self.nu+self.nu)\
                * (samplemean - self.priormu)**2

        #### Now calculate effective class conditional densities from eq 55
        #### page 21

        #self.fx = MVT(
        #self.mu0star,
        #(self.nu0star+1)/(self.kappa0star-self.D+1)/self.nu0star * self.S0star,
        #self.kappa0star - self.D + 1)
        # So I'm pretty sure this is incorrect below, off by some scaling
        # parameters
        self.fx = MVT([self.mustar],
                      [(self.nustar + 1) /
                       (self.kappastar) / self.nustar * self.Sstar / 2],
                      self.kappastar / 2)

        self.analyticfx = self.fx.logpdf(self.grid.reshape(-1, 1))

    def propose(self):
        self.oldmu = self.mu
        self.oldsigma = self.sigma

        self.mu += np.random.randn() * 0.1
        #self.mu = np.random.randn()
        self.sigma = di.invgamma.rvs(1)
        return 0

    def copy(self):
        return (self.mu, self.sigma, di.norm.rvs(loc=self.mu,
                                                 scale=self.sigma))

    def reject(self):
        self.mu = self.oldmu
        self.sigma = self.oldsigma

    def energy(self):
        sum = 0.0
        sum -= di.norm.logpdf(self.data, loc=self.mu, scale=self.sigma).sum()
        #Now add in the priors...
        sum -= log(self.sigma) * (-0.5) - self.nu / 2 * (
            self.mu - self.priormu)**2 / self.sigma
        sum -= log(self.sigma) * (self.kappa + 2) / (
            -2) - 0.5 * self.priorsigma / self.sigma
        return sum

    def calc_gfunc(self):
        return di.norm.pdf(self.grid, loc=self.mu, scale=self.sigma)

    def init_db(self, db, dbsize):
        pass
        #dtype = [('thetas',np.double),
        #('energies',np.double),
        #('funcs',np.double)]
        #if db == None:
        #return np.zeros(dbsize, dtype=dtype)
        #elif db.shape[0] != dbsize:
        #return np.resize(db, dbsize)
        #else:
        #raise Exception("DB Not inited")

    def save_to_db(self, db, theta, energy, iteration):
        #func = 0.0
        #db[iteration] = np.array([theta, energy, func])
        global mydb
        mydb.append(self.copy())

        # Update G function average
        self.numgavg += 1
        self.gavg += (self.calc_gfunc() - self.gavg) / self.numgavg
    def test_mvt_pdf(self):
        cov3 = self.cov3
        mu3 = self.mu3

        mvt = MVT((0,0), 1, 5)
        assert_almost_equal(mvt.logpdf(np.array([0.,0.])), -1.837877066409345,
                            decimal=15)
        assert_almost_equal(mvt.pdf(np.array([0.,0.])), 0.1591549430918953,
                            decimal=15)

        mvt.logpdf(np.array([1.,1.]))-(-3.01552989458359)

        mvt1 = MVT((0,0), 1, 1)
        mvt1.logpdf(np.array([1.,1.]))-(-3.48579549941151) #decimal=16

        rvs = mvt.rvs(100000)
        assert_almost_equal(np.cov(rvs, rowvar=0), mvt.cov, decimal=1)

        mvt31 = MVT(mu3, cov3, 1)
        assert_almost_equal(mvt31.pdf(cov3),
            [0.0007276818698165781, 0.0009980625182293658, 0.0027661422056214652],
            decimal=17)

        mvt = MVT(mu3, cov3, 3)
        assert_almost_equal(mvt.pdf(cov3),
            [0.000863777424247410, 0.001277510788307594, 0.004156314279452241],
            decimal=17)
Beispiel #12
0
class Diagnostic:
    R"""A class for quickly testing model checking methods discussed in Bastos & O'Hagan.

    """
    def __init__(self, mean, cov, df=None, random_state=1):
        self.mean = mean
        self.cov = cov
        self.sd = sd = np.sqrt(np.diag(cov))
        if df is None:
            self.dist = stats.multivariate_normal(mean=mean, cov=cov)
            self.udist = stats.norm(loc=mean, scale=sd)
            self.std_udist = stats.norm(loc=0., scale=1.)
        else:
            sigma = cov * (df - 2) / df
            self.dist = MVT(mean=mean, sigma=sigma, df=df)
            self.udist = stats.t(loc=mean, scale=sd, df=df)
            self.std_udist = stats.t(loc=0., scale=1., df=df)
        self.dist.random_state = random_state
        self.udist.random_state = random_state
        self.std_udist.random_state = random_state

        self._chol = cholesky(self.cov)
        self._pchol = pivoted_cholesky(self.cov)

        e, v = np.linalg.eigh(self.cov)
        # To match Bastos and O'Hagan definition
        # i.e., eigenvalues ordered from largest to smallest
        e, v = e[::-1], v[:, ::-1]
        ee = np.diag(np.sqrt(e))
        self._eig = (v @ ee)
        # self._eig = ee @ v

    def samples(self, n):
        return self.dist.rvs(n).T

    def individual_errors(self, y):
        R"""Computes the scaled individual errors diagnostic

        .. math::
            D_I(y) = \frac{y-m}{\sigma}

        Parameters
        ----------
        y : array, shape = (n_samples, n_curves)

        Returns
        -------
        array : shape = (n_samples, n_curves)
        """
        return ((y.T - self.mean) / np.sqrt(np.diag(self.cov))).T

    def cholesky_errors(self, y):
        return cholesky_errors(y.T, self.mean, self._chol).T

    def pivoted_cholesky_errors(self, y):
        return solve(self._pchol, (y.T - self.mean).T)

    def eigen_errors(self, y):
        return solve(self._eig, (y.T - self.mean).T)

    def chi2(self, y):
        return np.sum(self.individual_errors(y), axis=0)

    def md_squared(self, y):
        R"""The squared Mahalanobis distance"""
        return mahalanobis(y.T, self.mean, self._chol)**2

    def kl(self, mean, cov):
        R"""The Kullbeck-Leibler divergence"""
        m1, c1, chol1 = self.mean, self.cov, self._chol
        m0, c0 = mean, cov
        tr = np.trace(cho_solve((chol1, True), c0))
        dist = self.md_squared(m0)
        k = c1.shape[-1]
        logs = 2 * np.sum(np.log(np.diag(c1))) - np.linalg.slogdet(c0)[-1]
        return 0.5 * (tr + dist - k + logs)

    def credible_interval(self, y, intervals):
        """The credible interval diagnostic.

        Parameters
        ----------
        y : (n_c, d) shaped array
        intervals : 1d array
            The credible intervals at which to perform the test
        """
        lower, upper = self.udist.interval(np.atleast_2d(intervals).T)

        def diagnostic(data_, lower_, upper_):
            indicator = (lower_ < data_) & (data_ < upper_
                                            )  # 1 if in, 0 if out
            return np.average(indicator, axis=1)  # The diagnostic

        dci = np.apply_along_axis(diagnostic,
                                  axis=1,
                                  arr=np.atleast_2d(y).T,
                                  lower_=lower,
                                  upper_=upper)
        dci = np.squeeze(dci)
        return dci

    @staticmethod
    def variogram(X, y, bin_bounds):
        v = VariogramFourthRoot(X, y, bin_bounds)
        bin_locations = v.bin_locations
        gamma, lower, upper = v.compute(rt_scale=False)
        return v, bin_locations, gamma, lower, upper