def test_attributes(self): g = gmm.GMM(self.nstates, self.ndim, self.cvtype) self.assertEquals(g.nstates, self.nstates) self.assertEquals(g.ndim, self.ndim) self.assertEquals(g.cvtype, self.cvtype) g.weights = self.weights assert_array_almost_equal(g.weights, self.weights) self.assertRaises(ValueError, g.__setattr__, 'weights', 2 * self.weights) self.assertRaises(ValueError, g.__setattr__, 'weights', []) self.assertRaises(ValueError, g.__setattr__, 'weights', np.zeros((self.nstates - 2, self.ndim))) g.means = self.means assert_array_almost_equal(g.means, self.means) self.assertRaises(ValueError, g.__setattr__, 'means', []) self.assertRaises(ValueError, g.__setattr__, 'means', np.zeros((self.nstates - 2, self.ndim))) g.covars = self.covars[self.cvtype] assert_array_almost_equal(g.covars, self.covars[self.cvtype]) self.assertRaises(ValueError, g.__setattr__, 'covars', []) self.assertRaises(ValueError, g.__setattr__, 'covars', np.zeros((self.nstates - 2, self.ndim)))
def get_single_posteriors(self): """ get_single_posteriors() Find the posteriors for each data point as a Gaussian mixture, with each component in he mixture corresponding to a node that the data point appears in. """ self.post_GMMs = [] # get mixture models for each data point for it in range(self.Nleaves): path = self.find_path(it) # initialise a GMM post_GMM = gmm.GMM() node = self.root_node self.add_node_posterior(node, post_GMM, recurse=False) for direction in path: if direction == "left": node = node.left_child elif direction == "right": node = node.right_child self.add_node_posterior(node, post_GMM, recurse=False) post_GMM.normalise_weights() post_GMM.set_mean_covar() self.post_GMMs.append(post_GMM)
def get_individual_posterior(self, index): """ get_individual_posterior(index) Find the posteriors for a data point as a Gaussian mixture, with each component in he mixture corresponding to a node that the data point appears in. Parameters ---------- index : int The index of the data point Returns ------- post_GMM : gmm.GMM A Gaussian mixture model description of the posterior """ # initialise a GMM post_GMM = gmm.GMM() # Check if posteriors have been found for cluster_bhc if self.cluster_bhc.post_GMMs is None: self.cluster_bhc.get_single_posteriors() # get required posterior dist if self.assignments[index] >= 0: return self.cluster_bhc.post_GMMs[self.assignments[index]] else: return None
def get_cavity_priors(self): """ get_cavity_priors() Get the 'cavity priors' the prior implied for each datum by removing it from the clusters. """ # travese tree setting params if not self.params_set: self.set_params() self.cavity_GMMs = [] # get cavity prior mixture models for each data point for datum_it in range(self.data.shape[0]): # initialise a GMM cavity_GMM = gmm.GMM() for level_it in range(len(self.assignments)): node_it = self.assignments[level_it][datum_it] if node_it >= 0: node = self.nodes[level_it][node_it] if node.log_rk is not None: weight = node.prev_wk * math.exp(node.log_rk) else: # leaf weight = node.prev_wk mu, sigma = node.data_model.cavity_prior( self.data[datum_it], self.data_uncerts[datum_it], node.params) cavity_GMM.add_component(weight, mu, sigma) # deal with bhc tree children if node.tree_terminated and node.nk > 1: # check if single posteriors need finding if node.true_bhc.cavity_GMMs is None: node.true_bhc.get_cavity_priors() # find index of datum in this tree tree_it = np.nonzero( np.equal(node.true_bhc.data, self.data[datum_it]).all(1))[0][0] tree_GMM = node.true_bhc.cavity_GMMs[tree_it] tree_prev_wk = (node.prev_wk) cavity_GMM.weights.extend(tree_prev_wk * tree_GMM.weights[1:]) cavity_GMM.means.extend(tree_GMM.means[1:]) cavity_GMM.covars.extend(tree_GMM.covars[1:]) cavity_GMM.K += tree_GMM.K - 1 cavity_GMM.normalise_weights() cavity_GMM.set_mean_covar() self.cavity_GMMs.append(cavity_GMM)
def baltimore_gmm(data): def fgmm(x): return abs(np.sum(gmmimmat[gmmimmat > x]) * .0005**2 - 0.95) model = gmm.GMM(3) model.train(data, random=False) X, Y = makegrid(data) gmmimmat = np.zeros(X.shape) for i in xrange(X.shape[0]): for j in xrange(X.shape[1]): gmmimmat[i, j] = model.dgmm(np.array([X[i, j], Y[i, j]])) plt.jet() plt.imshow(gmmimmat, origin='lower') plt.ylim([0, X.shape[0]]) plt.xlim([0, X.shape[1]]) plt.savefig('baltimore_gmm.pdf') thresh = opt.fmin(fgmm, 10)[0] bools = gmmimmat > thresh mat = np.zeros(X.shape) mat += bools plt.imshow(mat, origin='lower') plt.ylim([0, X.shape[0]]) plt.xlim([0, X.shape[1]]) plt.savefig('gmmavoid.pdf')
def contrast(self): """ reapply gmm on each sharpens cluster """ newListDatas=[] for cluster in self.clustersList: diff = self.difference(cluster) sharp = self.sharpening(diff) newListDatas.append(sharp) newDataFrame=pd.concat(newListDatas) gmmList = [] for k in range(len(newDataFrame.columns)): zeroDatas = self.putZeros(newDataFrame,k) miniFrame,minimum=self.mini(zeroDatas,k) newGmm = gmm.GMM(miniFrame,self.numberCluster) dataFrame, centers = newGmm.result() lastDataFrame=self.soustractMin(dataFrame,minimum,k) clusterObject = clusterisation.Clusterisation(lastDataFrame,centers, isContrast = True, dimension = lastDataFrame.columns[k]) listeClusters = clusterObject.result() gmmList.append(listeClusters) return gmmList
def main(): #load trained FoE model from file foeModelFilename = './output/3pn_pt4/model_3pn_final.foe' foeModel = foe.load(foeModelFilename) #load test images noisyDirectory = './data/noisyData/3pn' groundTruthDirectory = './data/groundTruthData' noisyImages = util.loadImagesAsSegments(noisyDirectory) trueImages = util.loadImagesAsSegments(groundTruthDirectory) showImageResults = True #for each test image map = mapest.MAPEstimator(foeModel) errorHistory = [] numSamples = 20 for noisyImage, trueImage in itertools.izip(noisyImages, trueImages): #for each segment in image noisySegs = util.segmentImage(noisyImage) trueSegs = util.segmentImage(trueImage) for noisySeg, trueSeg in itertools.izip(noisySegs[:numSamples], trueSegs[:numSamples]): #initialize weights of GMM gmmModel = gmm.GMM(noisySeg) map.gmm = gmmModel #call maximization algorithm to produce denoised image denoisedSegment = map.estimate() #calculate accuracy of denoise err = np.mean((noisySeg - trueSeg)**2) errorHistory.append(err) #show the denoised segment (if enabled) if showImageResults: plt.subplot(1, 3, 1) plt.imshow(noisySeg.reshape( (util.segmentHeight, util.segmentWidth)), cmap='gray') plt.title('Noisy Segment') plt.subplot(1, 3, 2) plt.imshow(denoisedSegment.reshape( (util.segmentHeight, util.segmentWidth)), cmap='gray') plt.title('Reconstructed Segment') plt.subplot(1, 3, 3) plt.imshow(trueSeg.reshape( (util.segmentHeight, util.segmentWidth)), cmap='gray') plt.title('True Noise-Free Segment') plt.show() #save results avgError = np.mean(errorHistory) print 'Average Mean Squared Error: ', avgError with open('errorHistory.pckl', 'wb') as fid: pickle.dump(errorHistory, fid)
def test_rvs(self, n=1000): g = gmm.GMM(self.nstates, self.ndim, self.cvtype) # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. g.means = 20 * self.means g.covars = np.maximum(self.covars[self.cvtype], 0.1) g.weights = self.weights samples = g.rvs(n) self.assertEquals(samples.shape, (n, self.ndim))
def main(): print 'Loading means and vars...' means = np.load(os.path.join(OUTPUT_PATH, 'means.npy')) vars_ = np.load(os.path.join(OUTPUT_PATH, 'vars.npy')) print 'Building GMM...' g = gmm.GMM(means, vars_) g.train(LAYER, DATA_PATH, WEIGHTS_PATH, SOLVER_PATH, LAYER_SIZES[LAYER], num_iterations=100, batch_size=25, save_every=20)
def main(): obs = srk.Observation(np.loadtxt("data.txt")) correct_classes = np.loadtxt("correct.txt") # GMM単体 g = gmm.GMM(4, category=correct_classes) g.connect(obs) g.update() # GMMとマルコフモデルを結合したモデル g = gmm.GMM(4, category=correct_classes) m = mm.MarkovModel() g.connect(obs) m.connect(g) for itr in range(5): g.update() m.update()
def clusters_from_dataframe(df, ncluster): """ determines clusters from a given dataframe input : df the dataframe from whom infer the clusters input : ncluster the number of clusters to separate """ mgmm = gmm.GMM(df) clusters, centres = mgmm.result() mclusters = clusterisation.Clusterisation(clusters, centres) return mclusters.result(), mgmm
def main(): obs = srk.Observation(np.loadtxt("data.txt")) data_category = np.loadtxt("category.txt") vae1 = vae_model(18, epoch=200, batch_size=500) gmm1 = gmm.GMM(10, category=data_category) vae1.connect(obs) gmm1.connect(vae1) for i in range(5): print(i) vae1.update() gmm1.update()
def get_cavity_priors(self): """ get_cavity_priors() Find the 'cavity priors' for each data point as a Gaussian mixture, with each component in he mixture corresponding to a node that the data point appears in. """ # travese tree setting params self.set_params(self.root_node) self.cavity_GMMs = [] # get mixture models for each data point for it in range(self.data.shape[0]): path = self.find_path(it) # initialise a GMM cavity_GMM = gmm.GMM() node = self.root_node weight = node.prev_wk * math.exp(node.log_rk) mu, sigma = node.data_model.cavity_prior(self.data[it], self.data_uncerts[it], node.params) cavity_GMM.add_component(weight, mu, sigma) for direction in path: if direction == "left": node = node.left_child elif direction == "right": node = node.right_child mu, sigma = node.data_model.cavity_prior( self.data[it], self.data_uncerts[it], node.params) if node.log_rk is not None: weight = node.prev_wk * math.exp(node.log_rk) else: # a leaf weight = node.prev_wk cavity_GMM.add_component(weight, mu, sigma) cavity_GMM.normalise_weights() cavity_GMM.set_mean_covar() self.cavity_GMMs.append(cavity_GMM)
def get_global_posterior(self): """ get_global_posteriors() Find the posterior implied by the clustering as a Gaussian mixture, with each component in he mixture corresponding to a node in the clustering. """ # travese tree setting params if not self.params_set: self.set_params() # initialise a GMM self.global_GMM = gmm.GMM() # Traverse tree for level_it in range(len(self.assignments)): for node in self.nodes[level_it].values(): mu = node.params[0] sigma = node.params[1] + node.params[2] if node.log_rk is not None: weight = node.prev_wk * math.exp(node.log_rk) else: # leaf weight = node.prev_wk if weight > 0: self.global_GMM.add_component(weight, mu, sigma) # deal with bhc tree children if node.tree_terminated and node.nk > 1: # check if single posteriors need finding if node.true_bhc.global_GMM is None: node.true_bhc.get_global_posterior() self.global_GMM.weights.extend( node.prev_wk * node.true_bhc.global_GMM.weights[1:]) self.global_GMM.means.extend( node.true_bhc.global_GMM.means[1:]) self.global_GMM.covars.extend( node.true_bhc.global_GMM.covars[1:]) self.global_GMM.K += node.true_bhc.global_GMM.K - 1 self.global_GMM.normalise_weights() self.global_GMM.set_mean_covar()
def main(): obs = srk.Observation(np.loadtxt("data.txt")) data_category = np.loadtxt("category.txt") vae1 = vae.VAE(18, itr=200, batch_size=500) gmm1 = gmm.GMM(10, category=data_category) mm1 = mm.MarkovModel() vae1.connect(obs) gmm1.connect(vae1) mm1.connect(gmm1) for i in range(5): print(i) vae1.update() gmm1.update() mm1.update()
def test_eval(self): g = gmm.GMM(self.nstates, self.ndim, self.cvtype) # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. g.means = 20 * self.means g.covars = self.covars[self.cvtype] gaussidx = np.repeat(range(self.nstates), 5) nobs = len(gaussidx) obs = np.random.randn(nobs, self.ndim) + g.means[gaussidx] ll, posteriors = g.eval(obs) self.assertEqual(len(ll), nobs) self.assertEqual(posteriors.shape, (nobs, self.nstates)) assert_array_almost_equal(posteriors.sum(axis=1), np.ones(nobs)) assert_array_equal(posteriors.argmax(axis=1), gaussidx)
def contrast(self): """ reapply gmm on each sharpens cluster """ newListDatas=[] for cluster in self.clustersList: diff = self.difference(cluster) sharp = self.sharpening(diff) newListDatas.append(sharp) newDataFrame=pd.concat(newListDatas) newGmm = gmm.GMM(newDataFrame,self.numberCluster) dataFrame, centers = newGmm.result() clusterObject = clusterisation.Clusterisation(dataFrame,centers, isContrast = True) listeClusters = clusterObject.result() return listeClusters
def main(): obs1 = srk.Observation(np.loadtxt("data1.txt")) obs2 = srk.Observation(np.loadtxt("data2.txt")) data_category = np.loadtxt("category.txt") vae1 = vae_model(18, epoch=200, batch_size=500) gmm1 = gmm.GMM(10, category=data_category) mlda1 = mlda.MLDA(10, [200, 200], category=data_category) vae1.connect(obs1) gmm1.connect(vae1) mlda1.connect(obs2, gmm1) for i in range(5): print(i) vae1.update() gmm1.update() mlda1.update()
def main(): obs1 = srk.Observation(np.loadtxt("data1.txt")) obs2 = srk.Observation(np.loadtxt("data2.txt")) category = np.loadtxt("category.txt") vae1 = vae_model(10, epoch=200, batch_size=500) gmm1 = gmm.GMM(10, category=category) nn1 = NN_model(itr1=500, itr2=2000, batch_size1=500, batch_size2=500) vae1.connect(obs1) gmm1.connect(vae1) nn1.connect(gmm1, obs2) for i in range(10): print(i) vae1.update() gmm1.update() nn1.update()
def test_train(self, params='wmc'): g = gmm.GMM(self.nstates, self.ndim, self.cvtype) g.weights = self.weights g.means = self.means g.covars = 20 * self.covars[self.cvtype] # Create a training and testing set by sampling from the same # distribution. train_obs = g.rvs(n=100) test_obs = g.rvs(n=2) g.init(train_obs, params=params, minit='points') init_testll = g.lpdf(test_obs).sum() trainll = g.train(train_obs, iter=20, params=params) self.assert_(np.all(np.diff(trainll) > -1)) post_testll = g.lpdf(test_obs).sum() #print self.__class__.__name__, init_testll, post_testll self.assertTrue(post_testll >= init_testll)
def get_global_posterior(self): """ get_global_posteriors() Find the posterior implied by the clustering as a Gaussian mixture, with each component in he mixture corresponding to a node in the clustering. """ # initialise a GMM self.global_GMM = gmm.GMM() self.global_posterior_preds = [] # Traverse tree self.add_node_posterior(self.root_node, self.global_GMM, self.global_posterior_preds, recurse=True) self.global_GMM.normalise_weights() self.global_GMM.set_mean_covar()
def train(self, data): self.gmm_object = gmm.GMM(data, 2) self.gmm_object.train()
def get_individual_posterior(self, index): """ get_individual_posterior(index) Find the posteriors for a data point as a Gaussian mixture, with each component in he mixture corresponding to a node that the data point appears in. Parameters ---------- index : int The index of the data point Returns ------- post_GMM : gmm.GMM A Gaussian mixture model description of the posterior """ # travese tree setting params if not self.params_set: self.set_params() # get mixture model for data point # initialise a GMM post_GMM = gmm.GMM() for level_it in range(len(self.assignments)): node_it = self.assignments[level_it][index] if node_it >= 0: node = self.nodes[level_it][node_it] if node.log_rk is not None: weight = node.prev_wk * math.exp(node.log_rk) else: # leaf weight = node.prev_wk mu = node.params[0] sigma = node.params[1] + node.params[2] post_GMM.add_component(weight, mu, sigma) # deal with bhc tree children if node.tree_terminated and node.nk > 1: # check if single posteriors need finding if node.true_bhc.post_GMMs is None: node.true_bhc.get_single_posteriors() # find index of datum in this tree tree_it = np.nonzero( np.equal(node.true_bhc.data, self.data[index]).all(1))[0][0] tree_GMM = node.true_bhc.post_GMMs[tree_it] tree_prev_wk = (node.prev_wk) post_GMM.weights.extend(tree_prev_wk * tree_GMM.weights[1:]) post_GMM.means.extend(tree_GMM.means[1:]) post_GMM.covars.extend(tree_GMM.covars[1:]) post_GMM.K += tree_GMM.K - 1 post_GMM.normalise_weights() post_GMM.set_mean_covar() return post_GMM
def test_bad_cvtype(self): g = gmm.GMM(20, 1, self.cvtype) self.assertRaises(ValueError, gmm.GMM, 20, 1, 'badcvtype')
def get_single_posteriors(self): """ get_single_posteriors() Find the posteriors for each data point as a Gaussian mixture, with each component in he mixture corresponding to a node that the data point appears in. """ # travese tree setting params self.set_params() self.post_GMMs = [] # get mixture models for each data point for datum_it in range(self.data.shape[0]): # initialise a GMM post_GMM = gmm.GMM() for level_it in range(len(self.assignments)): node_it = self.assignments[level_it][datum_it] if node_it >= 0: node = self.nodes[level_it][node_it] tree_it = np.nonzero(np.equal( node.data, self.data[datum_it])\ .all(1)) if node.log_rk is not None: weight = node.prev_wk * math.exp(node.log_rk) else: # leaf weight = node.prev_wk mu, sigma = node.data_model.single_posterior( self.data[datum_it], self.data_uncerts[datum_it], node.params) post_GMM.add_component(weight, mu, sigma) # deal with bhc tree children if node.tree_terminated and node.nk > 1: # check if single posteriors need finding if node.true_bhc.post_GMMs is None: node.true_bhc.get_single_posteriors() # find index of datum in this tree tree_it = np.nonzero(np.equal( node.true_bhc.data, self.data[datum_it])\ .all(1))[0][0] tree_GMM = node.true_bhc.post_GMMs[tree_it] tree_prev_wk = (node.prev_wk) post_GMM.weights.extend(tree_prev_wk * tree_GMM.weights[1:]) post_GMM.means.extend(tree_GMM.means[1:]) post_GMM.covars.extend(tree_GMM.covars[1:]) post_GMM.normalise_weights() post_GMM.set_mean_covar() self.post_GMMs.append(post_GMM)