def fit_gmm(self, samples): """ Runs a couple of em instances on random starting points and returns internal GMM representation of best instance """ features = RealFeatures(samples.T) gmms = [] log_likelihoods = zeros(self.num_runs_em) for i in range(self.num_runs_em): # set up Shogun's GMM class and run em (corresponds to random # initialisation) gmm = GMM(self.num_components) gmm.set_features(features) log_likelihoods[i] = gmm.train_em() gmms.append(gmm) max_idx = log_likelihoods.argmax() # construct Gaussian mixture components in internal representation components = [] for i in range(self.num_components): mu = gmms[max_idx].get_nth_mean(i) Sigma = gmms[max_idx].get_nth_cov(i) components.append(Gaussian(mu, Sigma)) # construct a Gaussian mixture model based on the best EM run pie = gmms[max_idx].get_coef() proposal = MixtureDistribution(components[0].dimension, self.num_components, components, Discrete(pie)) return proposal
def construct_proposal(self, y): assert(len(shape(y)) == 1) m = MixtureDistribution(self.distribution.dimension, self.num_eigen) m.mixing_proportion = Discrete((self.eigvalues + 1) / (sum(self.eigvalues) + self.num_eigen)) # print "current mixing proportion: ", m.mixing_proportion.omega for ii in range(self.num_eigen): L = sqrt(self.dwscale[ii] * self.eigvalues[ii]) * reshape(self.eigvectors[:, ii], (self.distribution.dimension, 1)) m.components[ii] = Gaussian(y, L, is_cholesky=True, ell=1) # Z=m.sample(1000).samples # Visualise.plot_data(Z) return m
def __init__(self, dimension=2, num_components=2, components=None, mixing_proportion=None): Distribution.__init__(self, dimension) self.num_components = num_components if (components == None): self.components = [ Gaussian(mu=zeros(self.dimension), Sigma=eye(self.dimension)) for _ in range(self.num_components) ] else: assert (len(components) == self.num_components) self.components = components if (mixing_proportion == None): self.mixing_proportion = Discrete( (1.0 / num_components) * ones([num_components])) else: assert (num_components == mixing_proportion.num_objects) self.mixing_proportion = mixing_proportion
class MixtureDistribution(Distribution): """ mixing_proportion is of class Distribution->Discrete components is a list of Distributions """ def __init__(self, dimension=2, num_components=2, components=None, mixing_proportion=None): Distribution.__init__(self, dimension) self.num_components = num_components if (components == None): self.components = [Gaussian(mu=zeros(self.dimension),Sigma=eye(self.dimension)) for _ in range(self.num_components)] else: assert(len(components)==self.num_components) self.components=components if (mixing_proportion == None): self.mixing_proportion=Discrete((1.0/num_components)*ones([num_components])) else: assert(num_components==mixing_proportion.num_objects) self.mixing_proportion = mixing_proportion def __str__(self): s=self.__class__.__name__+ "=[" s += "components="+ str(self.components) s += ", mixing_proportion="+ str(self.mixing_proportion) s += ", " + Distribution.__str__(self) s += "]" return s def log_pdf(self, X, component_index_given=None): """ If component_index_given is given, then just condition on it, otherwise, should compute the overall log_pdf """ if component_index_given == None: rez = zeros([len(X)]) for ii in range(len(X)): logpdfs = zeros([self.num_components]) for jj in range(self.num_components): logpdfs[jj] = self.components[jj].log_pdf([X[ii]]) lmax = max(logpdfs) rez[ii] = lmax + log(sum(self.mixing_proportion.omega * exp(logpdfs - lmax))) return rez else: assert(component_index_given < self.num_components) return self.components[component_index_given].log_pdf(X) def sample(self, n=1): rez = zeros([n, self.dimension]) for ii in range(n): which_component = self.mixing_proportion.sample().samples rez[ii, :] = self.components[which_component].sample().samples return SampleFromMixture(rez,which_component)
def __init__(self, dimension=2, num_components=2, components=None, mixing_proportion=None): Distribution.__init__(self, dimension) self.num_components = num_components if (components == None): self.components = [Gaussian(mu=zeros(self.dimension),Sigma=eye(self.dimension)) for _ in range(self.num_components)] else: assert(len(components)==self.num_components) self.components=components if (mixing_proportion == None): self.mixing_proportion=Discrete((1.0/num_components)*ones([num_components])) else: assert(num_components==mixing_proportion.num_objects) self.mixing_proportion = mixing_proportion
def construct_proposal(self, y): """ proposal is a mixture of normals, centred at y and with covariance gamma^2 I + nu^2 MHaa'HM', where a are the eigenvectors of centred kernel matrix Kc=HKH """ assert (len(shape(y)) == 1) m = MixtureDistribution(self.distribution.dimension, self.num_eigen) m.mixing_proportion = Discrete( (self.eigvalues + 1) / (sum(self.eigvalues) + self.num_eigen)) # print "current mixing proportion: ", m.mixing_proportion.omega M = 2 * self.kernel.gradient(y, self.Z) H = Kernel.centring_matrix(len(self.Z)) for ii in range(self.num_eigen): Sigma = self.gamma ** 2 * eye(len(y)) + \ self.nu2 * (M.T).dot(H.dot(outer(self.eigvectors[:, ii], self.eigvectors[:, ii]).dot(H.dot(M)))) m.components[ii] = Gaussian(y, Sigma) return m
class MixtureDistribution(Distribution): """ mixing_proportion is of class Distribution->Discrete components is a list of Distributions """ def __init__(self, dimension=2, num_components=2, components=None, mixing_proportion=None): Distribution.__init__(self, dimension) self.num_components = num_components if (components == None): self.components = [ Gaussian(mu=zeros(self.dimension), Sigma=eye(self.dimension)) for _ in range(self.num_components) ] else: assert (len(components) == self.num_components) self.components = components if (mixing_proportion == None): self.mixing_proportion = Discrete( (1.0 / num_components) * ones([num_components])) else: assert (num_components == mixing_proportion.num_objects) self.mixing_proportion = mixing_proportion def __str__(self): s = self.__class__.__name__ + "=[" s += "components=" + str(self.components) s += ", mixing_proportion=" + str(self.mixing_proportion) s += ", " + Distribution.__str__(self) s += "]" return s def log_pdf(self, X, component_index_given=None): """ If component_index_given is given, then just condition on it, otherwise, should compute the overall log_pdf """ if component_index_given == None: rez = zeros([len(X)]) for ii in range(len(X)): logpdfs = zeros([self.num_components]) for jj in range(self.num_components): logpdfs[jj] = self.components[jj].log_pdf([X[ii]]) lmax = max(logpdfs) rez[ii] = lmax + log( sum(self.mixing_proportion.omega * exp(logpdfs - lmax))) return rez else: assert (component_index_given < self.num_components) return self.components[component_index_given].log_pdf(X) def sample(self, n=1): rez = zeros([n, self.dimension]) for ii in range(n): which_component = self.mixing_proportion.sample().samples rez[ii, :] = self.components[which_component].sample().samples return SampleFromMixture(rez, which_component)