def fit(self, X): self.X = X matX = np.asmatrix(self.X) self.T = X.shape[0] self.ndim = X.shape[1] # initialization schemes if self.init_method == 'random': self.A = np.zeros((self.num_gaussians, self.num_gaussians)) self.A += 1.0 / self.num_gaussians self.A += np.random.rand(self.num_gaussians, self.num_gaussians) * 0.1 self.A /= self.A.sum(axis=0) self.pi = np.zeros(self.num_gaussians) self.pi += 1.0 self.pi += np.random.rand(self.num_gaussians) * 0.01 self.pi /= self.pi.sum() self.mu = X[np.random.choice(range(0, len(X)), self.num_gaussians), :] # sample from the data self.sigma = list() self.B = np.zeros((self.T, self.num_gaussians)) for k in range(self.num_gaussians): self.sigma.append(np.identity(self.ndim, dtype=np.float64)) self.sigma[k] += np.random.rand(self.ndim, self.ndim) # purely synthetic self.sigma[k] = np.dot(self.sigma[k], self.sigma[k].T) # making it positive semi-definite self.sigma[k] /= self.sigma[k].sum() self.B[:, k] = normal(self.mu[k], self.sigma[k]).pdf(X) ######## BEGIN ACTUAL ALGORITHM ################### gamma = np.zeros((self.T, self.num_gaussians)) # gamma(n, i) = p(z_n=i | X, theta) ksi = np.zeros((self.num_gaussians, self.num_gaussians, self.T)) # ksi[i, j, t] = p(z_n = i, z_n-1 = j | X, theta) for iter in range(self.max_iter): # E step alpha, beta, c = self.forward_backward() # c is scaling factors ect = np.zeros((self.num_gaussians, self.num_gaussians)) for i in range(self.num_gaussians): gamma[0, i] = alpha[0, i] * beta[0, i] for t in range(1, self.T): # skip 0 - covered by pi for i in range(self.num_gaussians): gamma[t, i] = alpha[t, i] * beta[t, i] for j in range(self.num_gaussians): ksi[j, i, t] = alpha[t-1, j] * beta[t, i] * self.A[j, i] * self.B[t, i] / c[t] ect[j, i] += ksi[j, i, t] # expected count of zn =j and zn-1 =i is sum over t of p(z_{n-1}=j, z_n=i) # M step self.A = ect / ect.sum(axis=1) # appears to be right axis... sum over the posterior, i.e. axis 1 (rows) self.pi = gamma[0, :] / gamma[0, :].sum() # Checks out with maths for k in range(self.num_gaussians): norm = gamma[:, k].sum() self.mu[k] = np.dot(gamma[:, k], X) / norm # looks right intermed = np.multiply(gamma[:, k], (matX - self.mu[k]).T).T self.sigma[k] = np.dot(intermed.T, (matX - self.mu[k])) / norm # since taken from GAUSS MIX, correct self.B[:, k] = normal(self.mu[k], self.sigma[k]).pdf(X) # recalculating the table of densities
def _pdf(self,x,mu,left_sigma,right_sigma): try: mu=list(mu)[0] left_sigma=list(left_sigma)[0] right_sigma=list(right_sigma)[0] except: pass left=normal(loc=mu,scale=left_sigma) right=normal(loc=mu,scale=right_sigma) return(np.piecewise(x,[x<mu,x>=mu], [lambda y : left.pdf(y)/np.max(left.pdf(y)), lambda y : right.pdf(y)/np.max(right.pdf(y))]))
def tune(self): cov = np.array([[1, 0, -0.5, 0, 0, 0.5], [0, 1, 0, -0.5, 0, 0.5], [-0.5, 0, 1, 0, 0, 0], [0, -0.5, 0, 1, 0.1, 0], [0, 0, 0, 0, 1, 0], [0.5, 0.5, 0, 0, 0, 1]]) for i in range(1, 11): s, accept_rat = mh(self.stationary, lambda xp, xn: normal(xp, cov*i).pdf(xn), lambda x: normal(x, cov*i).rvs(), 1000, [0, 0, 0, 0, 4, 5]) print("Cov x " + str(i) + " accept_rat " + str(accept_rat))
def proposal_distribution(self, xprev, xnext): ''' returns probability of transtition from previous sample (xprev) to next smaple (xnext) ''' bs = normal(xprev, self.proposal_cov).pdf(xnext) return bs
def main(): set_printoptions(precision=3) X = [ array([12.7, 6.6, 14.7, 12.2, 4.4, 7.8, 13.8, 13.7, 11.1, 9.1, 14.0]), array([17.1, 11.9, 12.7, 16.8, 15.0, 14.6, 13.7, 16.4]), array([ 5.2, 4.5, 10.5, 15.0, 5.0, 14.9, 7.6, 8.3, 10.8, 14.6, 15.1, 7.0, 9.3 ]), array([ 14.3, 16.2, 10.0, 13.1, 16.9, 11.2, 10.1, 18.3, 13.5, 15.0, 15.1, 14.8, 15.7, 13.2, 12.2, 13.2 ]), array([10.5, 7.5, 4.7, 12.5, 13.1, 13.5, 12.2, 16.1, 9.0, 17.9]) ] sigma2 = 4. n = 8. # Get the posteriors beta_post, tau2_post = get_beta_tau2_posterior_samples_MCMC(X, sigma2, n) # Calculate CDF x2_mean = mean(X[1]) x3_mean = mean(X[2]) m2 = (sigma2 * beta_post / n + tau2_post * x2_mean) / (sigma2 / n + tau2_post) m3 = (sigma2 * beta_post / n + tau2_post * x3_mean) / (sigma2 / n + tau2_post) v2 = (sigma2 * tau2_post / n) / (sigma2 / n + tau2_post) rv = normal() for b in [0, 1, 3, 5]: Ps = rv.cdf((m2 - m3 - b) / sqrt(sigma2 * 2 + v2 * 2)) print "For b = %d, P = %.3f" % (b, mean(Ps))
def proposal_sampler_kth(self, x): ''' returns new sample based on previou sample x ''' bs = normal(x, self.proposal_cov_kth).rvs() bs[4] = abs(bs[4]) return bs
def proposal_distribution(self, xprev, xnext): ''' returns probability of transtition from previous sample (xprev) to next smaple (xnext) ''' cov = [[1, 0, 0], [0, 1, 0], [0, 0, 0.5]] return normal(xprev, cov).pdf(xnext)
def rvs(self, nums): ans = np.empty((nums, self.dim)) #print(ans) for num in range(nums): for t in range(self.dim): temp_model = normal(0, self.sigma_2 / self.T[0, t]) ans[num, t] = temp_model.rvs(1)[0] return (ans)
def get_mcmc(self, k): if k in self.dimension2mcmc.keys(): return self.dimension2mcmc[k] blr = self.stat_factory.get_stationary(k) prop = ProposalDistribution2(normal(np.zeros(blr.n), 5*np.eye(blr.n))) mcmc = Mcmc(prop, blr) self.dimension2mcmc[k] = mcmc return mcmc
def __init__(self, xs, ys, n_breaks): ''' @param xs - xove souradnice dat @param ys - yove souradnice dat @param n_breaks - pocet zlomu ''' if len(xs) is not len(ys): raise RuntimeError("Not matchin dimension") self.xs = xs self.ys = ys self.max_x = max(xs) self.min_x = min(xs) self.n = 2 * n_breaks + 5 self.n_samples = len(xs) self.h_prior = normal(np.zeros(int((self.n - 1) / 2)), 100 * np.eye(int((self.n - 1) / 2))) self.sigma_prior = normal(0, 3) self.n_breaks = n_breaks
def __init__(self, d, noise = 0, w = None): DataSet.__init__(self, d, noise = noise) self.dist = normal(loc = 0.0, scale = 1.0/sqrt(d)) if w is None: self.w = self.dist.rvs(size = self.d) #print self.w else: self.w = w.astype(float64)
def __init__(self, d, noise=0, w=None): DataSet.__init__(self, d, noise=noise) self.dist = normal(loc=0.0, scale=1.0 / sqrt(d)) if w is None: self.w = self.dist.rvs(size=self.d) #print self.w else: self.w = w.astype(float64)
def pred(self, test_data): # use learned gaussians to predict most likely distribution and total likelihood of the pt num_pts = len(test_data) p = np.zeros((num_pts, self.num_gaussians), dtype=np.float64) for k in range(self.num_gaussians): normal_var = normal(mean=self.mu[k], cov=self.sigma[k]) p[:, k] = self.lmbda[k] * normal_var.pdf(test_data) pred_labels = p.argmax(axis=1) likelihood = p.sum(axis=1) return pred_labels, likelihood
class Quad2D(Quad): from . import qf2d x_dot = staticmethod(qf2d.x_dot) step_eul = staticmethod(qf2d.step_eul) step_array = staticmethod(qf2d.step_array) # x y z qi qj qk qr vx vy vz wx wy wz w0 w1 w2 w3 x_sample_mean = np.r_[0, 0, 0, 0,.2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] x_sample_std = np.r_[1, 0, 1, 0,.1, 0,.1,.5, 0,.5, 0,.1, 0, 0, 0, 0, 0] state_dist = normal(x_sample_mean, x_sample_std)
def znorm_fixed(self,mean,sd): self.analyse() #print self.word,self.avgsim,self.sd for(sim,neigh) in self.tuplelist: p=normal(mean,sd).cdf(sim) self.allsims[neigh]=p self.analyse() #print self.word,self.avgsim,self.sd self.tuplelist=[] self.topk(self.getk())
def main(): set_printoptions(precision=3) P = zeros(6) for i, deltaq in enumerate([30, 32, 34, 36, 38, 40]): tau2 = get_tau2_posterior_samples_MCMC(deltaq) rv = normal() numer = 65.2 - deltaq - deltaq * tau2 / (4 + tau2) denom = sqrt(8 + 8 * tau2 / (4 + tau2)) Ps = 1 - rv.cdf(numer / denom) P[i] = mean(Ps) print P
def pred( self, test_data ): # use learned gaussians to predict most likely distribution and total likelihood of the pt num_pts = len(test_data) p = np.zeros((num_pts, self.num_gaussians), dtype=np.float64) for k in range(self.num_gaussians): normal_var = normal(mean=self.mu[k], cov=self.sigma[k]) p[:, k] = self.lmbda[k] * normal_var.pdf(test_data) pred_labels = p.argmax(axis=1) likelihood = p.sum(axis=1) return pred_labels, likelihood
def neural_next_state(state, delta, J, T, N, g=tanh): """neural_next_state: given previous state, J, and timestep, return next state in neural model :param state: previous state :param J: interaction matrix :param T: temperature of the noise :param N: system size :param delta: timestep """ return state + delta * (-state + J @ g(state)) + \ normal(0, sqrt(2*T*delta)).rvs(N)
def _probKeep(self, pos): if len(self.locations) == 0: return 1. # compute Metropolis Hastings acceptance probability for mixture model norm = normal(pos, cov=0.4 * np.eye(2)) currPdf = norm.pdf(pos) proposalPdf = 0. for loc, _ in self.locations: proposalPdf += norm.pdf(loc) proposalPdf /= len(self.locations) acceptProb = proposalPdf / currPdf return acceptProb
def blr_1break(xs, ys): ''' vraci funkci co spocita posterior hustotu pro bayes linear regresi s jednim zlomem ys, xs - ys pozorovane hodnoty, xs nezavisle hodnoty b00,b10 - koeficient u zavisle promenne pres respektive za zlomem b0 - y=b00*x+b0 sigma - rozpytyl s - zlomn ''' prior_b00 = lambda x: normal(0, 3).pdf(x) prior_b10 = lambda x: normal(1, 3).pdf(x) prior_b0 = lambda x: normal(0, 5).pdf(x) prior_sigma = lambda x: normal(4, 3).pdf(x) prior_switch = lambda x: normal(5, 2).pdf(x) def prob_density(x): b00, b10, b0, sigma, switch = x prob = 0 n = len(xs) for i, xi in enumerate(xs): if xi < switch: prob += (ys[i] - (xi * b00 + b0))**2 else: prob += (ys[i] - ((xi - switch) * b10 + (b00 * switch + b0)))**2 sigma = abs(sigma) if sigma < 0: raise Exception("sigma < 0") prob = (sigma)**(-n / 2) * np.exp(-prob / (2 * sigma)) return np.product([ prob, prior_b00(b00), prior_b10(b10), prior_b0(b0), prior_sigma(sigma), prior_switch(switch) ]) return prob_density
def __init__(self, d, p = 2.0, q = 2.0, margin = 0.5, noise = 0, w = None): DataSet.__init__(self, d, noise = noise) self.p = p self.q = q self.margin = margin self.dist = normal(loc = 0.0, scale = 1.0/sqrt(d)) if w is None: self.w = self.dist.rvs(size = self.d) else: self.w = w.astype(float64) if self.q != None: self.w /= norm(self.w, self.q)
def __init__(self, d, p=2.0, q=2.0, margin=0.5, noise=0, w=None): DataSet.__init__(self, d, noise=noise) self.p = p self.q = q self.margin = margin self.dist = normal(loc=0.0, scale=1.0 / sqrt(d)) if w is None: self.w = self.dist.rvs(size=self.d) else: self.w = w.astype(float64) if self.q != None: self.w /= norm(self.w, self.q)
def spherical_next_state(state, delta, J, T, N): """spherical_next_state: given previous state, J, and timestep, return next state in spherical model :param state: previous state :param J: interaction matrix :param T: temperature of the noise :param N: system size :param delta: timestep """ return state + delta * \ (-state/N * (state.T @ J @ state) + J @ state) + \ normal(0, sqrt(2*T*delta)).rvs(N) @ (identity(N) - 1/N*outer(state, state))
def adjustsims(self,myBless,meanpoly,sdpoly): (_,w1)=myBless.countdict.get(self.word,(0,0)) # print self.word,w1 for(sim,neigh) in self.tuplelist: (_,w2)=myBless.countdict.get(untag(neigh,'/'),(0,0)) #print self.word,w1,neigh,w2 jointwidth=widthfunction(w1,w2) mean=meanpoly(jointwidth) sd=sdpoly(jointwidth) p=normal(mean,sd).cdf(sim) self.allsims[neigh]=p self.tuplelist=[] self.topk(self.getk())
def znorm(self): #estimate normal dist params and transform into normal probs self.analyse() #print self.word, self.avgsim,self.sd for (sim,neigh) in self.tuplelist: p = normal(self.avgsim,self.sd).cdf(sim) self.allsims[neigh]=p self.analyse() #print self.word,self.avgsim,self.sd self.tuplelist=[] self.topk(self.getk())
def stationary1(dataset1): ''' returns a function that will compute the value for a sample given the parameters ''' prior_mu1 = partial(normal(0, 10).pdf) prior_sigma1 = lambda x: normal(0, 10).pdf(x)*uniform(0, 10).pdf(x) n = normal() def likelihood1(x, mu1, sigma1): if sigma1 < 0: raise Exception("Variance can't be negative") n.mean = mu1 n.cov = sigma1 return n.pdf(x) def prob_density(sample): if not len(sample) == 2: raise Exception("Wrong sample length, actual sample length: " + str(len(sample))) mu1 = sample[0] sigma1 = sample[1] probabilities = np.zeros(len(dataset1)) for i, x in enumerate(dataset1): probabilities[i] = likelihood1(x, mu1, sigma1) probability = np.prod(probabilities) return probability*prior_mu1(mu1)*prior_sigma1(sigma1) return prob_density
def znorm(self): #estimate normal dist params and transform into normal probs self.analyse() if ThesEntry.debug: print self.word, self.avgsim,self.sd for (sim,neigh) in self.tuplelist: p = normal(self.avgsim,self.sd).cdf(sim) self.allsims[neigh]=p self.analysed=False self.analyse() if ThesEntry.debug: print self.word,self.avgsim,self.sd self.tuplelist=[]
def blr_nbreaks(xs, ys): ''' vraci funkci co spocita posterior hustotu pro bayes linear regresi s n zlomy ys, xs - ys pozorovane hodnoty, xs nezavisle hodnoty b0 - y=b00*x+b0 sigma - rozpytyl b1s - n+1 sklonu primky switches - n zlomu primky ''' # prvni budde vzdycky b0 a sigma pak b1s a swithces # takze delka jednoho samplu by mela byt vzdycky # 1 + 1 + (n+1) + n = 3+2n prior = partial(normal(prior_mu, prior_cov).pdf) n_breaks = self.n_breaks def prob_density(x): assert len(x) == self.dimension b0 = x[0] sigma = x[1] b1s = x[2:3+n_breaks] switches = x[3+n_breaks:] # pridam si nekonecno do switchu at muzu tesstovat # jen z jedne strany switches = np.append(switches, np.inf) prob = 0 n = len(xs) for i, xi in enumerate(xs): # v jake hodnote zacinam for j, switch in enumerate(switches): if j == 0: a = b0 else: a = a + b1s[j-1]*switches[j-1] - b1s[j]*switches[j-1] if xi < switch: prob += (ys[i] - (xi*b1s[j]+a))**2 break sigma = abs(sigma) if sigma < 0: raise Exception("sigma < 0") prob = (sigma)**(-n/2) * np.exp(-prob/(2*sigma)) return prob*prior(x) return prob_density
def pred(self, test_data): # calculate best labels (settings for z) and the log likelihood P(X | A, mu, pi, sigma) if len(test_data) != self.T: raise ValueError("Testing data does not match the length of the training data.") alpha, beta, c = self.forward_backward(test_data) # c is scaling constants gamma = alpha * beta # gamma(n, i) = p(z_n=i | X, theta) pred_labels = gamma.argmax(axis=1) # predicted labels, i.e. choices of latent variable z testB = np.zeros((self.T, self.num_gaussians)) # B[t, i] = p(x_t | z_t, theta) (emission probability) for k in range(self.num_gaussians): testB[:, k] = normal(self.mu[k], self.sigma[k]).pdf(test_data) ksi = np.zeros((self.num_gaussians, self.num_gaussians, self.T)) # ksi[i, j, t] = p(z_n = i, z_n-1 = j | X, theta) for t in range(1, self.T): # skip 0 - covered by pi for i in range(self.num_gaussians): for j in range(self.num_gaussians): ksi[j, i, t] = alpha[t-1, j] * beta[t, i] * self.A[j, i] * testB[t, i] / c[t] log_likelihood = np.log(c).sum() return pred_labels, log_likelihood
def __init__(self, xs, ys, n_breaks): ''' @param xs - xove souradnice dat @param ys - yove souradnice dat @param n_breaks - pocet zlomu ''' if not (len(xs) == len(ys)): raise RuntimeError("Not matching dimension. Dimension xs=" + str(len(xs)) + " Dimension ys=" + str(len(ys))) self.xs = xs self.ys = ys self.max_y = max(ys) self.min_y = min(ys) self.max_x = max(xs) self.min_x = min(xs) self.n = 2*n_breaks + 5 self.n_samples = len(xs) self.sigma_prior = normal(3, 0.2) self.n_breaks = n_breaks self.h_prior = uniform(min(ys) - 5, (max(ys) + 5) - (min(ys) - 5))
def get_moves_from(self, k): if k in self.dimension_to_moves.keys(): return self.dimension_to_moves[k] print("Creating moves from " + str(k)) moves = [] u = uniform(0, 1) n = normal(0, 1) step = 1 if 5 < k <= 10: step = 2 if 10 < k <= 15: step = 3 if 15 < k <= 20: step = 5 if 20 < k: 7 # moves up for i in range(0, k + 1, step): t_up, j_up = self._create_transformation_up(k, i) t_down, j_down = self._create_transformation_down(k + 1, i) u_gen_up = ProposalDistribution( lambda x: u.pdf(x[0]) * n.pdf(x[1]), lambda: [u.rvs(), n.rvs()]) move = Move(k, k + 1, 0.05 / (k + 1), 0.05 / (k + 1), t_up, t_down, j_up, j_down, u_gen_up, None, 2, 0) moves.append(move) # moves down for i in range(0, k, step): t_up, j_up = self._create_transformation_up(k - 1, i) t_down, j_down = self._create_transformation_down(k, i) u_gen_up = ProposalDistribution( lambda x: u.pdf(x[0]) * n.pdf(x[1]), lambda: [u.rvs(), n.rvs()]) move = Move(k - 1, k, 0.05 / k, 0.05 / k, t_up, t_down, j_up, j_down, u_gen_up, None, 2, 0) moves.append(move) self.dimension_to_moves[k] = moves return moves
def getProposal2(): # n1 = normal(0, 3) # n2 = normal(0, 3) # n3 = normal(0, 3) # n4 = normal(0, 3) cov = np.array([[3, 0, 0, 0], [0, 3, 0, 0], [0, 0, 3, 0], [0, 0, 0, 3]]) n = normal([0, 0, 0, 0], cov) def proposal(x, y): if not len(x) == 4: raise Exception("Wrong sample length") if not len(y) == 4: raise Exception("Wrong sample length") # n1.mean = x[0] # n2.mean = x[1] # n3.mean = x[2] # n4.mean = x[3] n.mean = x return n.pdf(y) # return np.prod([ # n1.pdf(y[0]), # n2.pdf(y[1]), # n3.pdf(y[2]), # n4.pdf(y[3]) # ]) return proposal
from scipy.stats import multivariate_normal as normal import numpy as np from time import time from experiments.lnpdfs.create_target_lnpfs import build_GPR_iono_lnpdf from sampler.SliceSampling.slice_sampler import slice_sample num_dimensions = 35 lnpdf = build_GPR_iono_lnpdf()[0] prior = normal(np.zeros((num_dimensions)), np.eye((num_dimensions))) initial = prior.rvs(1) def sample(n_samps, sigma, path): start = time() samples = slice_sample(lnpdf, initial, n_samps, sigma * np.ones(num_dimensions), path) end = time() np.savez(path, samples=samples, wallclocktime=end - start, n_fevals=lnpdf.counter) print("done")
def fit(self, X): self.X = X self.N = X.shape[0] self.ndim = X.shape[1] np.random.seed(self.random_seed) matX = np.asmatrix(X) # initialization schemes if self.init_method == 'random': if self.init_means is not None: mu = self.init_means else: mu = X[np.random.choice(range(0, len(X)), self.num_gaussians), :] # sample from the data if self.init_cov is not None: sigma = self.init_cov else: sigma = list() for k in range(self.num_gaussians): sigma.append(np.identity(self.ndim, dtype=np.float64)) sigma[k] += np.random.rand(self.ndim, self.ndim) # purely synthetic sigma[k] = np.dot(sigma[k], sigma[k].T) # making it positive semi-definite and symmetric sigma[k] /= sigma[k].sum() # lowerbound = k * self.N / self.num_gaussians # sample from data # upperbound = lowerbound + 20 # sigma[k] = np.cov(X[lowerbound:upperbound, :].T) if self.init_weights is not None: lmbda = self.init_weights else: lmbda = np.random.rand(self.num_gaussians) lmbda /= lmbda.sum() elif self.init_method == 'kmeans': # use means of kmeans as initial means, and calculate cov from the clusters model = KMeans(K=self.num_gaussians, max_iter=5) model.fit(X) labels = model.pred(X) mu = np.zeros((self.num_gaussians, self.ndim)) sigma = [np.zeros((self.ndim, self.ndim))] * self.num_gaussians for k in range(self.num_gaussians): cluster = X[labels == k] mu[k] = cluster.mean(axis=0) sigma[k] = np.cov(cluster.T) if self.init_weights is not None: lmbda = self.init_weights else: lmbda = np.random.rand(self.num_gaussians) lmbda /= lmbda.sum() ######## BEGIN ACTUAL ALGORITHM ################### for iter in range(self.max_iter): phat = np.zeros((self.N, self.num_gaussians)) N = np.zeros(self.num_gaussians) # E step for k in range(0, self.num_gaussians): normal_var = normal(mean=mu[k], cov=sigma[k]) phat[:, k] = lmbda[k] * normal_var.pdf(X) phat /= phat.sum(axis=1)[:, None] # faster to do it all with numpy than use loops # for n in range(0, self.N): # loop over each data point # for k in range(0, self.num_gaussians): # normalx = normal(mean=mu[k], cov=sigma[k]).pdf(X[n, :]) # phat[n, k] = lmbda[k] * normalx # phat[n, :] /= phat[n, :].sum() # M step for k in range(self.num_gaussians): N[k] = phat[:, k].sum() mu[k] = np.dot(phat[:, k], X) / N[k] intermed = np.multiply(phat[:, k], (matX - mu[k]).T).T sigma[k] = np.dot(intermed.T, (matX - mu[k])) / N[k] lmbda[k] = N[k] / self.N pass # end of this iteration self.mu = mu self.sigma = sigma self.lmbda = lmbda
def forward_backward(self, X=None): T = self.T A = self.A if X is None: B = self.B # training else: B = np.zeros((T, self.num_gaussians)) # testing for k in range(self.num_gaussians): B[:, k] = normal(self.mu[k], self.sigma[k]).pdf(X) # alpha[-1, self.num_gaussians] = 1 # alpha[-1, :self.num_gaussians] = 0 c = np.zeros(T) # alpha = np.zeros((T, self.num_gaussians)) # beta = np.zeros((T, self.num_gaussians)) # for t in range(0, T): # Standard calculation # if t == 0: # for j in range(self.num_gaussians): # alpha[0, j] = self.pi[j] * self.B[0, j] # # else: # for j in range(self.num_gaussians): # for i in range(self.num_gaussians): # alpha[t, j] += alpha[t-1, i] * A[i, j] * B[t, j] # c[t] = alpha[t, :].sum() # alpha[t, :] /= c[t] # Normalize to alpha hat as in Bishop # # for t in range(T-1, -1, -1): # if t == T-1: # for i in range(self.num_gaussians): # beta[T-1, i] = 1 # else: # for i in range(self.num_gaussians): # for j in range(self.num_gaussians): # beta[t, i] += beta[t+1, j] * A[i, j] * B[t+1, j] # beta[t, :] /= c[t+1] # return alpha, beta # a == alpha # b == beta a = np.zeros((T, self.num_gaussians)) b = np.zeros((T, self.num_gaussians)) for j in range(self.num_gaussians): a[0, j] = self.pi[j] * self.B[0, j] c[0] = a[0, :].sum() a[0, :] /= c[0] for t in range(1, T): # The same as above, but vectorized a[t, :] = np.dot(A.T, a[t-1, :]) * B[t, :] c[t] = a[t, :].sum() a[t, :] /= c[t] for j in range(self.num_gaussians): b[T-1, j] = 1 for t in range(T-2, -1, -1): b[t, :] = np.dot(A, (b[t+1, :]) * B[t+1, :] ) b[t, :] /= c[t+1] return a, b, c
def fit(self, X): self.X = X self.N = X.shape[0] self.ndim = X.shape[1] np.random.seed(self.random_seed) matX = np.asmatrix(X) # initialization schemes if self.init_method == 'random': if self.init_means is not None: mu = self.init_means else: mu = X[np.random.choice(range( 0, len(X)), self.num_gaussians), :] # sample from the data if self.init_cov is not None: sigma = self.init_cov else: sigma = list() for k in range(self.num_gaussians): sigma.append(np.identity(self.ndim, dtype=np.float64)) sigma[k] += np.random.rand(self.ndim, self.ndim) # purely synthetic sigma[k] = np.dot( sigma[k], sigma[k].T ) # making it positive semi-definite and symmetric sigma[k] /= sigma[k].sum() # lowerbound = k * self.N / self.num_gaussians # sample from data # upperbound = lowerbound + 20 # sigma[k] = np.cov(X[lowerbound:upperbound, :].T) if self.init_weights is not None: lmbda = self.init_weights else: lmbda = np.random.rand(self.num_gaussians) lmbda /= lmbda.sum() elif self.init_method == 'kmeans': # use means of kmeans as initial means, and calculate cov from the clusters model = KMeans(K=self.num_gaussians, max_iter=5) model.fit(X) labels = model.pred(X) mu = np.zeros((self.num_gaussians, self.ndim)) sigma = [np.zeros((self.ndim, self.ndim))] * self.num_gaussians for k in range(self.num_gaussians): cluster = X[labels == k] mu[k] = cluster.mean(axis=0) sigma[k] = np.cov(cluster.T) if self.init_weights is not None: lmbda = self.init_weights else: lmbda = np.random.rand(self.num_gaussians) lmbda /= lmbda.sum() ######## BEGIN ACTUAL ALGORITHM ################### for iter in range(self.max_iter): phat = np.zeros((self.N, self.num_gaussians)) N = np.zeros(self.num_gaussians) # E step for k in range(0, self.num_gaussians): normal_var = normal(mean=mu[k], cov=sigma[k]) phat[:, k] = lmbda[k] * normal_var.pdf(X) phat /= phat.sum(axis=1)[:, None] # faster to do it all with numpy than use loops # for n in range(0, self.N): # loop over each data point # for k in range(0, self.num_gaussians): # normalx = normal(mean=mu[k], cov=sigma[k]).pdf(X[n, :]) # phat[n, k] = lmbda[k] * normalx # phat[n, :] /= phat[n, :].sum() # M step for k in range(self.num_gaussians): N[k] = phat[:, k].sum() mu[k] = np.dot(phat[:, k], X) / N[k] intermed = np.multiply(phat[:, k], (matX - mu[k]).T).T sigma[k] = np.dot(intermed.T, (matX - mu[k])) / N[k] lmbda[k] = N[k] / self.N pass # end of this iteration self.mu = mu self.sigma = sigma self.lmbda = lmbda
from time import time import numpy as np import os from scipy.stats import multivariate_normal as normal from experiments.lnpdfs.create_target_lnpfs import build_GPR2_iono_lnpdf from sampler.elliptical_slice.bovy_mcmc.elliptical_slice import elliptical_slice as ess_update num_dimensions = 34 prior_cov = 10 * np.eye(num_dimensions) #prior_cov[0,0] = 1. prior_chol = np.sqrt(prior_cov) prior = normal(np.zeros(num_dimensions), prior_cov) target_lnpdf = build_GPR2_iono_lnpdf(prior_on_variance=False)[0] def sample(n_samps, path=None): if path is not None: dirname = os.path.dirname(path) if not os.path.exists(dirname): os.makedirs(dirname) iters = [] nfevals = [] target_lnpdf.counter = 0 start = time() timestamps = [] cur_theta = prior.rvs(1) cur_lnpdf = target_lnpdf(cur_theta, without_prior=True) all_samples = []
def __init__(self, d, dist1mean, dist2mean, cov, size, noise = 0, w = None): x = rnd.multivariate_normal(dist1mean, cov, size) #z = rnd.multivariate_normal(dist2mean, cov, size) #z = zeros((size,2)) #x = zeros((size,2)) ''' i=0 while i < size: temp = rnd.multivariate_normal(dist1mean,cov) if norm(temp,2) <= 1: z[i] = temp i += 1 i=0 while i < size: temp = rnd.multivariate_normal(dist1mean,cov) if norm(temp,2) > 1.2: x[i] = temp i += 1 P = [] Q = [] R = [] S = [] for i in range(size): p,q = x[i] P.append(p) Q.append(q) r,s = z[i] R.append(r) S.append(s) #plt.clf() #plt.plot(P,Q,'bo') #plt.plot(R,S,'g+') #plt.show() ''' temp = [] for i in range(size): # change here for separable and non separable dataset #temp.append([z[i],1]) #temp.append([x[i],-1]) temp.append(z[i]) #temp.append(x[i]) self.data = temp self.n = size self.shuffle = True self.repeat = True self.maxnorm = self.calculatemaxnorm() DataSet.__init__(self, d, extend = False, norm_p = True, noise = noise) if w is None: self.w = normal(loc = 0.0, scale = 1.0/sqrt(d)) else: self.w = w.astype(float64)
def __init__(self, d, dist1mean, dist2mean, cov, size, noise=0, w=None): x = rnd.multivariate_normal(dist1mean, cov, size) #z = rnd.multivariate_normal(dist2mean, cov, size) #z = zeros((size,2)) #x = zeros((size,2)) ''' i=0 while i < size: temp = rnd.multivariate_normal(dist1mean,cov) if norm(temp,2) <= 1: z[i] = temp i += 1 i=0 while i < size: temp = rnd.multivariate_normal(dist1mean,cov) if norm(temp,2) > 1.2: x[i] = temp i += 1 P = [] Q = [] R = [] S = [] for i in range(size): p,q = x[i] P.append(p) Q.append(q) r,s = z[i] R.append(r) S.append(s) #plt.clf() #plt.plot(P,Q,'bo') #plt.plot(R,S,'g+') #plt.show() ''' temp = [] for i in range(size): # change here for separable and non separable dataset #temp.append([z[i],1]) #temp.append([x[i],-1]) temp.append(z[i]) #temp.append(x[i]) self.data = temp self.n = size self.shuffle = True self.repeat = True self.maxnorm = self.calculatemaxnorm() DataSet.__init__(self, d, extend=False, norm_p=True, noise=noise) if w is None: self.w = normal(loc=0.0, scale=1.0 / sqrt(d)) else: self.w = w.astype(float64)
Bhat = np.dot(BhatI, BhatR) An = np.dot(T(X), X) + A0 Ainv = npla.inv(An) munleft = npla.inv(An) munright = np.dot(A0, mu0) + np.dot(T(X), np.dot(X, Bhat)) mun = np.dot(munleft, munright) bn = b0 + 0.5 * (np.dot(T(Y), Y) + np.dot(T(mu0), np.dot(A0, mu0)) - np.dot(T(mun), np.dot(An, mun))) an = a0 + n / 2 mean = mun[:, 0] q = bn[0][0] stat = lambda B: normal(mean, abs(B[2]) * Ainv).pdf(B[0:2]) * invgamma( a=an, loc=0, scale=q).pdf(B[2]) k1 = 0.05 k2 = 0.05 k3 = 5 cov_sampler = np.array([[k1, 0, 0], [0, k2, 0], [0, 0, k3]]) prop = lambda x, xi: normal(xi, cov_sampler).pdf(x) prop_sampler = lambda x: normal(x, cov_sampler).rvs() cov_sampler2 = np.array([[20, 0.5, 0], [0.5, 20, 0.5], [0, 0.5, 20]]) prop2 = lambda x, xi: normal(xi, cov_sampler2).pdf(x) prop_sampler2 = lambda x: normal(x, cov_sampler2).rvs()