def __init__(self, mean, cov, df=None, random_state=1): self.mean = mean self.cov = cov self.sd = sd = np.sqrt(np.diag(cov)) if df is None: self.dist = stats.multivariate_normal(mean=mean, cov=cov) self.udist = stats.norm(loc=mean, scale=sd) self.std_udist = stats.norm(loc=0., scale=1.) else: sigma = cov * (df - 2) / df self.dist = MVT(mean=mean, sigma=sigma, df=df) self.udist = stats.t(loc=mean, scale=sd, df=df) self.std_udist = stats.t(loc=0., scale=1., df=df) self.dist.random_state = random_state self.udist.random_state = random_state self.std_udist.random_state = random_state self._chol = cholesky(self.cov) self._pchol = pivoted_cholesky(self.cov) e, v = np.linalg.eigh(self.cov) # To match Bastos and O'Hagan definition # i.e., eigenvalues ordered from largest to smallest e, v = e[::-1], v[:, ::-1] ee = np.diag(np.sqrt(e)) self._eig = (v @ ee)
def train_analytic(self): """ Calculating the analytic distribution posteriors given on page 15 of Lori's Optimal Classification eq 34. """ self.nustar = self.nu + self.n samplemean = self.data.mean(axis=0) samplecov = np.cov(self.data.T) self.mustar = (self.nu * self.priormu + self.n * samplemean) \ / (self.nu + self.n) self.kappastar = self.kappa + self.n self.Sstar = self.S + (self.n-1)*samplecov + self.nu*self.n/(self.nu+self.n)\ * np.outer((samplemean - self.priormu), (samplemean - self.priormu)) # Now calculate effective class conditional densities from eq 55 page 21 self.fx = MVT( self.mustar, (self.nustar+1)/(self.kappastar-self.D+1)/self.nustar * self.Sstar, self.kappastar - self.D + 1)
class GaussianBayes(object): def __init__(self, priormu, nu, kappa, S, data, alpha=1, true_dist=None): """ Initialize Gaussian distribution with data and priors then calculate the analytic posterior """ self.true_dist = true_dist self.data = data.copy() self.n = data.shape[0] self.priormu = np.asarray(priormu) self.nu = nu self.kappa = kappa self.S = np.asarray(S) self.alpha = alpha self.D = self.data.shape[1] self.train_analytic() def train_analytic(self): """ Calculating the analytic distribution posteriors given on page 15 of Lori's Optimal Classification eq 34. """ self.nustar = self.nu + self.n samplemean = self.data.mean(axis=0) samplecov = np.cov(self.data.T) self.mustar = (self.nu * self.priormu + self.n * samplemean) / (self.nu + self.n) self.kappastar = self.kappa + self.n self.Sstar = ( self.S + (self.n - 1) * samplecov + self.nu * self.n / (self.nu + self.n) * np.outer((samplemean - self.priormu), (samplemean - self.priormu)) ) # Now calculate effective class conditional densities from eq 55 page 21 self.fx = MVT( self.mustar, (self.nustar + 1) / (self.kappastar - self.D + 1) / self.nustar * self.Sstar, self.kappastar - self.D + 1, ) def eval_posterior(self, pts): return self.fx.logpdf(pts)
class GaussianBayes(object): def __init__(self, priormu, nu, kappa, S, data, alpha=1, true_dist=None): """ Initialize Gaussian distribution with data and priors then calculate the analytic posterior """ self.true_dist = true_dist self.data = data.copy() self.n = data.shape[0] self.priormu = np.asarray(priormu) self.nu = nu self.kappa = kappa self.S = np.asarray(S) self.alpha = alpha self.D = self.data.shape[1] self.train_analytic() def train_analytic(self): """ Calculating the analytic distribution posteriors given on page 15 of Lori's Optimal Classification eq 34. """ self.nustar = self.nu + self.n samplemean = self.data.mean(axis=0) samplecov = np.cov(self.data.T) self.mustar = (self.nu * self.priormu + self.n * samplemean) \ / (self.nu + self.n) self.kappastar = self.kappa + self.n self.Sstar = self.S + (self.n-1)*samplecov + self.nu*self.n/(self.nu+self.n)\ * np.outer((samplemean - self.priormu), (samplemean - self.priormu)) # Now calculate effective class conditional densities from eq 55 page 21 self.fx = MVT( self.mustar, (self.nustar+1)/(self.kappastar-self.D+1)/self.nustar * self.Sstar, self.kappastar - self.D + 1) def eval_posterior(self, pts): return self.fx.logpdf(pts)
def train_analytic(self): """ Calculating the analytic distribution posteriors given on page 15 of Lori's Optimal Classification eq 34. """ self.nustar = self.nu + self.n samplemean = self.data.mean(axis=0) samplecov = np.cov(self.data.T) self.mustar = (self.nu * self.priormu + self.n * samplemean) / (self.nu + self.n) self.kappastar = self.kappa + self.n self.Sstar = ( self.S + (self.n - 1) * samplecov + self.nu * self.n / (self.nu + self.n) * np.outer((samplemean - self.priormu), (samplemean - self.priormu)) ) # Now calculate effective class conditional densities from eq 55 page 21 self.fx = MVT( self.mustar, (self.nustar + 1) / (self.kappastar - self.D + 1) / self.nustar * self.Sstar, self.kappastar - self.D + 1, )
def __init__(self): np.random.seed(1234) self.n = 4 # Data points self.true_mu = 0.0 self.true_sigma = 1 #di.invgamma.rvs(3) # For G function calculation and averaging self.grid_n = 100 low,high = -4, 4 self.gextent = (low,high) self.grid = np.linspace(low,high,self.grid_n) self.gavg = np.zeros(self.grid_n) self.numgavg = 0 #self.data = di.norm.rvs(size=self.n) self.data = np.array([0.0, -0.0, 0.5, -0.5]) assert self.data.size == self.n ######## Starting point of MCMC Run ####### self.mu = 0.0 self.sigma = 2.0 ###### Bookeeping ###### self.oldmu = None self.oldsigma = None ##### Prior Values and Confidences ###### self.priorsigma = 2 self.kappa = 1 self.priormu = 0 self.nu = 8.0 #### Calculating the Analytic solution given on page 15 of Lori's #### Optimal Classification eq 34. self.nustar = self.nu + self.n samplemean = self.data.mean() samplevar = np.cov(self.data) self.mustar = (self.nu*self.priormu + self.n * samplemean) \ / (self.nu + self.n) self.kappastar = self.kappa + self.n self.Sstar = self.priorsigma + (self.n-1)*samplevar + self.nu*self.n/(self.nu+self.nu)\ * (samplemean - self.priormu)**2 #### Now calculate effective class conditional densities from eq 55 #### page 21 #self.fx = MVT( #self.mu0star, #(self.nu0star+1)/(self.kappa0star-self.D+1)/self.nu0star * self.S0star, #self.kappa0star - self.D + 1) # So I'm pretty sure this is incorrect below, off by some scaling # parameters self.fx = MVT( [self.mustar], [(self.nustar+1)/(self.kappastar)/self.nustar * self.Sstar / 2], self.kappastar /2 ) self.analyticfx = self.fx.logpdf(self.grid.reshape(-1,1))
class Classification(): def __init__(self): np.random.seed(1234) self.n = 4 # Data points self.true_mu = 0.0 self.true_sigma = 1 #di.invgamma.rvs(3) # For G function calculation and averaging self.grid_n = 100 low,high = -4, 4 self.gextent = (low,high) self.grid = np.linspace(low,high,self.grid_n) self.gavg = np.zeros(self.grid_n) self.numgavg = 0 #self.data = di.norm.rvs(size=self.n) self.data = np.array([0.0, -0.0, 0.5, -0.5]) assert self.data.size == self.n ######## Starting point of MCMC Run ####### self.mu = 0.0 self.sigma = 2.0 ###### Bookeeping ###### self.oldmu = None self.oldsigma = None ##### Prior Values and Confidences ###### self.priorsigma = 2 self.kappa = 1 self.priormu = 0 self.nu = 8.0 #### Calculating the Analytic solution given on page 15 of Lori's #### Optimal Classification eq 34. self.nustar = self.nu + self.n samplemean = self.data.mean() samplevar = np.cov(self.data) self.mustar = (self.nu*self.priormu + self.n * samplemean) \ / (self.nu + self.n) self.kappastar = self.kappa + self.n self.Sstar = self.priorsigma + (self.n-1)*samplevar + self.nu*self.n/(self.nu+self.nu)\ * (samplemean - self.priormu)**2 #### Now calculate effective class conditional densities from eq 55 #### page 21 #self.fx = MVT( #self.mu0star, #(self.nu0star+1)/(self.kappa0star-self.D+1)/self.nu0star * self.S0star, #self.kappa0star - self.D + 1) # So I'm pretty sure this is incorrect below, off by some scaling # parameters self.fx = MVT( [self.mustar], [(self.nustar+1)/(self.kappastar)/self.nustar * self.Sstar / 2], self.kappastar /2 ) self.analyticfx = self.fx.logpdf(self.grid.reshape(-1,1)) def propose(self): self.oldmu = self.mu self.oldsigma = self.sigma self.mu += np.random.randn()*0.1 #self.mu = np.random.randn() self.sigma = di.invgamma.rvs(1) return 0 def copy(self): return (self.mu, self.sigma, di.norm.rvs(loc=self.mu, scale=self.sigma)) def reject(self): self.mu = self.oldmu self.sigma = self.oldsigma def energy(self): sum = 0.0 sum -= di.norm.logpdf(self.data, loc=self.mu, scale=self.sigma).sum() #Now add in the priors... sum -= log(self.sigma)*(-0.5) - self.nu/2 * (self.mu-self.priormu)**2/self.sigma sum -= log(self.sigma)*(self.kappa+2)/(-2) - 0.5*self.priorsigma/self.sigma return sum def calc_gfunc(self): return di.norm.pdf(self.grid, loc=self.mu, scale=self.sigma) def init_db(self, db, dbsize): pass #dtype = [('thetas',np.double), #('energies',np.double), #('funcs',np.double)] #if db == None: #return np.zeros(dbsize, dtype=dtype) #elif db.shape[0] != dbsize: #return np.resize(db, dbsize) #else: #raise Exception("DB Not inited") def save_to_db(self, db, theta, energy, iteration): #func = 0.0 #db[iteration] = np.array([theta, energy, func]) global mydb mydb.append(self.copy()) # Update G function average self.numgavg += 1 self.gavg += (self.calc_gfunc() - self.gavg) / self.numgavg
def test_mvt_pdf(self): cov3 = self.cov3 mu3 = self.mu3 mvt = MVT((0, 0), 1, 5) assert_almost_equal(mvt.logpdf(np.array([0., 0.])), -1.837877066409345, decimal=15) assert_almost_equal(mvt.pdf(np.array([0., 0.])), 0.1591549430918953, decimal=15) mvt.logpdf(np.array([1., 1.])) - (-3.01552989458359) mvt1 = MVT((0, 0), 1, 1) mvt1.logpdf(np.array([1., 1.])) - (-3.48579549941151) #decimal=16 rvs = mvt.rvs(100000) assert_almost_equal(np.cov(rvs, rowvar=0), mvt.cov, decimal=1) mvt31 = MVT(mu3, cov3, 1) assert_almost_equal(mvt31.pdf(cov3), [ 0.0007276818698165781, 0.0009980625182293658, 0.0027661422056214652 ], decimal=17) mvt = MVT(mu3, cov3, 3) assert_almost_equal( mvt.pdf(cov3), [0.000863777424247410, 0.001277510788307594, 0.004156314279452241], decimal=17)
def __init__(self): np.random.seed(1234) self.n = 4 # Data points self.true_mu = 0.0 self.true_sigma = 1 #di.invgamma.rvs(3) # For G function calculation and averaging self.grid_n = 100 low, high = -4, 4 self.gextent = (low, high) self.grid = np.linspace(low, high, self.grid_n) self.gavg = np.zeros(self.grid_n) self.numgavg = 0 #self.data = di.norm.rvs(size=self.n) self.data = np.array([0.0, -0.0, 0.5, -0.5]) assert self.data.size == self.n ######## Starting point of MCMC Run ####### self.mu = 0.0 self.sigma = 2.0 ###### Bookeeping ###### self.oldmu = None self.oldsigma = None ##### Prior Values and Confidences ###### self.priorsigma = 2 self.kappa = 1 self.priormu = 0 self.nu = 8.0 #### Calculating the Analytic solution given on page 15 of Lori's #### Optimal Classification eq 34. self.nustar = self.nu + self.n samplemean = self.data.mean() samplevar = np.cov(self.data) self.mustar = (self.nu*self.priormu + self.n * samplemean) \ / (self.nu + self.n) self.kappastar = self.kappa + self.n self.Sstar = self.priorsigma + (self.n-1)*samplevar + self.nu*self.n/(self.nu+self.nu)\ * (samplemean - self.priormu)**2 #### Now calculate effective class conditional densities from eq 55 #### page 21 #self.fx = MVT( #self.mu0star, #(self.nu0star+1)/(self.kappa0star-self.D+1)/self.nu0star * self.S0star, #self.kappa0star - self.D + 1) # So I'm pretty sure this is incorrect below, off by some scaling # parameters self.fx = MVT([self.mustar], [(self.nustar + 1) / (self.kappastar) / self.nustar * self.Sstar / 2], self.kappastar / 2) self.analyticfx = self.fx.logpdf(self.grid.reshape(-1, 1))
class Classification(): def __init__(self): np.random.seed(1234) self.n = 4 # Data points self.true_mu = 0.0 self.true_sigma = 1 #di.invgamma.rvs(3) # For G function calculation and averaging self.grid_n = 100 low, high = -4, 4 self.gextent = (low, high) self.grid = np.linspace(low, high, self.grid_n) self.gavg = np.zeros(self.grid_n) self.numgavg = 0 #self.data = di.norm.rvs(size=self.n) self.data = np.array([0.0, -0.0, 0.5, -0.5]) assert self.data.size == self.n ######## Starting point of MCMC Run ####### self.mu = 0.0 self.sigma = 2.0 ###### Bookeeping ###### self.oldmu = None self.oldsigma = None ##### Prior Values and Confidences ###### self.priorsigma = 2 self.kappa = 1 self.priormu = 0 self.nu = 8.0 #### Calculating the Analytic solution given on page 15 of Lori's #### Optimal Classification eq 34. self.nustar = self.nu + self.n samplemean = self.data.mean() samplevar = np.cov(self.data) self.mustar = (self.nu*self.priormu + self.n * samplemean) \ / (self.nu + self.n) self.kappastar = self.kappa + self.n self.Sstar = self.priorsigma + (self.n-1)*samplevar + self.nu*self.n/(self.nu+self.nu)\ * (samplemean - self.priormu)**2 #### Now calculate effective class conditional densities from eq 55 #### page 21 #self.fx = MVT( #self.mu0star, #(self.nu0star+1)/(self.kappa0star-self.D+1)/self.nu0star * self.S0star, #self.kappa0star - self.D + 1) # So I'm pretty sure this is incorrect below, off by some scaling # parameters self.fx = MVT([self.mustar], [(self.nustar + 1) / (self.kappastar) / self.nustar * self.Sstar / 2], self.kappastar / 2) self.analyticfx = self.fx.logpdf(self.grid.reshape(-1, 1)) def propose(self): self.oldmu = self.mu self.oldsigma = self.sigma self.mu += np.random.randn() * 0.1 #self.mu = np.random.randn() self.sigma = di.invgamma.rvs(1) return 0 def copy(self): return (self.mu, self.sigma, di.norm.rvs(loc=self.mu, scale=self.sigma)) def reject(self): self.mu = self.oldmu self.sigma = self.oldsigma def energy(self): sum = 0.0 sum -= di.norm.logpdf(self.data, loc=self.mu, scale=self.sigma).sum() #Now add in the priors... sum -= log(self.sigma) * (-0.5) - self.nu / 2 * ( self.mu - self.priormu)**2 / self.sigma sum -= log(self.sigma) * (self.kappa + 2) / ( -2) - 0.5 * self.priorsigma / self.sigma return sum def calc_gfunc(self): return di.norm.pdf(self.grid, loc=self.mu, scale=self.sigma) def init_db(self, db, dbsize): pass #dtype = [('thetas',np.double), #('energies',np.double), #('funcs',np.double)] #if db == None: #return np.zeros(dbsize, dtype=dtype) #elif db.shape[0] != dbsize: #return np.resize(db, dbsize) #else: #raise Exception("DB Not inited") def save_to_db(self, db, theta, energy, iteration): #func = 0.0 #db[iteration] = np.array([theta, energy, func]) global mydb mydb.append(self.copy()) # Update G function average self.numgavg += 1 self.gavg += (self.calc_gfunc() - self.gavg) / self.numgavg
def test_mvt_pdf(self): cov3 = self.cov3 mu3 = self.mu3 mvt = MVT((0,0), 1, 5) assert_almost_equal(mvt.logpdf(np.array([0.,0.])), -1.837877066409345, decimal=15) assert_almost_equal(mvt.pdf(np.array([0.,0.])), 0.1591549430918953, decimal=15) mvt.logpdf(np.array([1.,1.]))-(-3.01552989458359) mvt1 = MVT((0,0), 1, 1) mvt1.logpdf(np.array([1.,1.]))-(-3.48579549941151) #decimal=16 rvs = mvt.rvs(100000) assert_almost_equal(np.cov(rvs, rowvar=0), mvt.cov, decimal=1) mvt31 = MVT(mu3, cov3, 1) assert_almost_equal(mvt31.pdf(cov3), [0.0007276818698165781, 0.0009980625182293658, 0.0027661422056214652], decimal=17) mvt = MVT(mu3, cov3, 3) assert_almost_equal(mvt.pdf(cov3), [0.000863777424247410, 0.001277510788307594, 0.004156314279452241], decimal=17)
class Diagnostic: R"""A class for quickly testing model checking methods discussed in Bastos & O'Hagan. """ def __init__(self, mean, cov, df=None, random_state=1): self.mean = mean self.cov = cov self.sd = sd = np.sqrt(np.diag(cov)) if df is None: self.dist = stats.multivariate_normal(mean=mean, cov=cov) self.udist = stats.norm(loc=mean, scale=sd) self.std_udist = stats.norm(loc=0., scale=1.) else: sigma = cov * (df - 2) / df self.dist = MVT(mean=mean, sigma=sigma, df=df) self.udist = stats.t(loc=mean, scale=sd, df=df) self.std_udist = stats.t(loc=0., scale=1., df=df) self.dist.random_state = random_state self.udist.random_state = random_state self.std_udist.random_state = random_state self._chol = cholesky(self.cov) self._pchol = pivoted_cholesky(self.cov) e, v = np.linalg.eigh(self.cov) # To match Bastos and O'Hagan definition # i.e., eigenvalues ordered from largest to smallest e, v = e[::-1], v[:, ::-1] ee = np.diag(np.sqrt(e)) self._eig = (v @ ee) # self._eig = ee @ v def samples(self, n): return self.dist.rvs(n).T def individual_errors(self, y): R"""Computes the scaled individual errors diagnostic .. math:: D_I(y) = \frac{y-m}{\sigma} Parameters ---------- y : array, shape = (n_samples, n_curves) Returns ------- array : shape = (n_samples, n_curves) """ return ((y.T - self.mean) / np.sqrt(np.diag(self.cov))).T def cholesky_errors(self, y): return cholesky_errors(y.T, self.mean, self._chol).T def pivoted_cholesky_errors(self, y): return solve(self._pchol, (y.T - self.mean).T) def eigen_errors(self, y): return solve(self._eig, (y.T - self.mean).T) def chi2(self, y): return np.sum(self.individual_errors(y), axis=0) def md_squared(self, y): R"""The squared Mahalanobis distance""" return mahalanobis(y.T, self.mean, self._chol)**2 def kl(self, mean, cov): R"""The Kullbeck-Leibler divergence""" m1, c1, chol1 = self.mean, self.cov, self._chol m0, c0 = mean, cov tr = np.trace(cho_solve((chol1, True), c0)) dist = self.md_squared(m0) k = c1.shape[-1] logs = 2 * np.sum(np.log(np.diag(c1))) - np.linalg.slogdet(c0)[-1] return 0.5 * (tr + dist - k + logs) def credible_interval(self, y, intervals): """The credible interval diagnostic. Parameters ---------- y : (n_c, d) shaped array intervals : 1d array The credible intervals at which to perform the test """ lower, upper = self.udist.interval(np.atleast_2d(intervals).T) def diagnostic(data_, lower_, upper_): indicator = (lower_ < data_) & (data_ < upper_ ) # 1 if in, 0 if out return np.average(indicator, axis=1) # The diagnostic dci = np.apply_along_axis(diagnostic, axis=1, arr=np.atleast_2d(y).T, lower_=lower, upper_=upper) dci = np.squeeze(dci) return dci @staticmethod def variogram(X, y, bin_bounds): v = VariogramFourthRoot(X, y, bin_bounds) bin_locations = v.bin_locations gamma, lower, upper = v.compute(rt_scale=False) return v, bin_locations, gamma, lower, upper