Example #1
0
    def test_attributes(self):
        g = gmm.GMM(self.nstates, self.ndim, self.cvtype)
        self.assertEquals(g.nstates, self.nstates)
        self.assertEquals(g.ndim, self.ndim)
        self.assertEquals(g.cvtype, self.cvtype)

        g.weights = self.weights
        assert_array_almost_equal(g.weights, self.weights)
        self.assertRaises(ValueError, g.__setattr__, 'weights',
                          2 * self.weights)
        self.assertRaises(ValueError, g.__setattr__, 'weights', [])
        self.assertRaises(ValueError, g.__setattr__, 'weights',
                          np.zeros((self.nstates - 2, self.ndim)))

        g.means = self.means
        assert_array_almost_equal(g.means, self.means)
        self.assertRaises(ValueError, g.__setattr__, 'means', [])
        self.assertRaises(ValueError, g.__setattr__, 'means',
                          np.zeros((self.nstates - 2, self.ndim)))

        g.covars = self.covars[self.cvtype]
        assert_array_almost_equal(g.covars, self.covars[self.cvtype])
        self.assertRaises(ValueError, g.__setattr__, 'covars', [])
        self.assertRaises(ValueError, g.__setattr__, 'covars',
                          np.zeros((self.nstates - 2, self.ndim)))
Example #2
0
    def get_single_posteriors(self):
        """ get_single_posteriors()

            Find the posteriors for each data point as a Gaussian
            mixture, with each component in he mixture corresponding
            to a node that the data point appears in.

        """
        self.post_GMMs = []

        # get mixture models for each data point

        for it in range(self.Nleaves):
            path = self.find_path(it)

            # initialise a GMM
            post_GMM = gmm.GMM()

            node = self.root_node

            self.add_node_posterior(node, post_GMM, recurse=False)

            for direction in path:
                if direction == "left":
                    node = node.left_child
                elif direction == "right":
                    node = node.right_child

                self.add_node_posterior(node, post_GMM, recurse=False)

            post_GMM.normalise_weights()
            post_GMM.set_mean_covar()
            self.post_GMMs.append(post_GMM)
Example #3
0
    def get_individual_posterior(self, index):
        """ get_individual_posterior(index)

            Find the posteriors for a data point as a Gaussian
            mixture, with each component in he mixture corresponding
            to a node that the data point appears in.

            Parameters
            ----------
            index : int
                The index of the data point

            Returns
            -------
            post_GMM : gmm.GMM
                A Gaussian mixture model description of the posterior
        """
        # initialise a GMM
        post_GMM = gmm.GMM()

        # Check if posteriors have been found for cluster_bhc
        if self.cluster_bhc.post_GMMs is None:
            self.cluster_bhc.get_single_posteriors()

        # get required posterior dist
        if self.assignments[index] >= 0:
            return self.cluster_bhc.post_GMMs[self.assignments[index]]
        else:
            return None
Example #4
0
    def get_cavity_priors(self):
        """ get_cavity_priors()

            Get the 'cavity priors' the prior implied for each datum
            by removing it from the clusters.
        """
        # travese tree setting params

        if not self.params_set:
            self.set_params()

        self.cavity_GMMs = []

        # get cavity prior mixture models for each data point
        for datum_it in range(self.data.shape[0]):
            # initialise a GMM
            cavity_GMM = gmm.GMM()

            for level_it in range(len(self.assignments)):
                node_it = self.assignments[level_it][datum_it]

                if node_it >= 0:
                    node = self.nodes[level_it][node_it]

                    if node.log_rk is not None:
                        weight = node.prev_wk * math.exp(node.log_rk)
                    else:  # leaf
                        weight = node.prev_wk

                    mu, sigma = node.data_model.cavity_prior(
                        self.data[datum_it], self.data_uncerts[datum_it],
                        node.params)
                    cavity_GMM.add_component(weight, mu, sigma)

                    # deal with bhc tree children
                    if node.tree_terminated and node.nk > 1:

                        # check if single posteriors need finding
                        if node.true_bhc.cavity_GMMs is None:
                            node.true_bhc.get_cavity_priors()

                        # find index of datum in this tree

                        tree_it = np.nonzero(
                            np.equal(node.true_bhc.data,
                                     self.data[datum_it]).all(1))[0][0]

                        tree_GMM = node.true_bhc.cavity_GMMs[tree_it]

                        tree_prev_wk = (node.prev_wk)

                        cavity_GMM.weights.extend(tree_prev_wk *
                                                  tree_GMM.weights[1:])
                        cavity_GMM.means.extend(tree_GMM.means[1:])
                        cavity_GMM.covars.extend(tree_GMM.covars[1:])
                        cavity_GMM.K += tree_GMM.K - 1

            cavity_GMM.normalise_weights()
            cavity_GMM.set_mean_covar()
            self.cavity_GMMs.append(cavity_GMM)
def baltimore_gmm(data):
    def fgmm(x):
        return abs(np.sum(gmmimmat[gmmimmat > x]) * .0005**2 - 0.95)

    model = gmm.GMM(3)
    model.train(data, random=False)

    X, Y = makegrid(data)
    gmmimmat = np.zeros(X.shape)

    for i in xrange(X.shape[0]):
        for j in xrange(X.shape[1]):
            gmmimmat[i, j] = model.dgmm(np.array([X[i, j], Y[i, j]]))

    plt.jet()
    plt.imshow(gmmimmat, origin='lower')
    plt.ylim([0, X.shape[0]])
    plt.xlim([0, X.shape[1]])
    plt.savefig('baltimore_gmm.pdf')

    thresh = opt.fmin(fgmm, 10)[0]
    bools = gmmimmat > thresh
    mat = np.zeros(X.shape)
    mat += bools
    plt.imshow(mat, origin='lower')
    plt.ylim([0, X.shape[0]])
    plt.xlim([0, X.shape[1]])
    plt.savefig('gmmavoid.pdf')
    def contrast(self):
        """ reapply gmm on each sharpens cluster """

        newListDatas=[]

        for cluster in self.clustersList:
            diff = self.difference(cluster)
            sharp = self.sharpening(diff)
            newListDatas.append(sharp)

        newDataFrame=pd.concat(newListDatas)

        gmmList = []

        for k in range(len(newDataFrame.columns)):
            zeroDatas = self.putZeros(newDataFrame,k)
            miniFrame,minimum=self.mini(zeroDatas,k)
            newGmm = gmm.GMM(miniFrame,self.numberCluster)
            dataFrame, centers = newGmm.result()
            lastDataFrame=self.soustractMin(dataFrame,minimum,k)
            clusterObject = clusterisation.Clusterisation(lastDataFrame,centers, isContrast = True, dimension = lastDataFrame.columns[k])
            listeClusters = clusterObject.result()
            gmmList.append(listeClusters)

        return gmmList
Example #7
0
def main():
    #load trained FoE model from file
    foeModelFilename = './output/3pn_pt4/model_3pn_final.foe'
    foeModel = foe.load(foeModelFilename)
    #load test images
    noisyDirectory = './data/noisyData/3pn'
    groundTruthDirectory = './data/groundTruthData'
    noisyImages = util.loadImagesAsSegments(noisyDirectory)
    trueImages = util.loadImagesAsSegments(groundTruthDirectory)
    showImageResults = True

    #for each test image
    map = mapest.MAPEstimator(foeModel)
    errorHistory = []
    numSamples = 20
    for noisyImage, trueImage in itertools.izip(noisyImages, trueImages):
        #for each segment in image
        noisySegs = util.segmentImage(noisyImage)
        trueSegs = util.segmentImage(trueImage)
        for noisySeg, trueSeg in itertools.izip(noisySegs[:numSamples],
                                                trueSegs[:numSamples]):
            #initialize weights of GMM
            gmmModel = gmm.GMM(noisySeg)
            map.gmm = gmmModel

            #call maximization algorithm to produce denoised image
            denoisedSegment = map.estimate()

            #calculate accuracy of denoise
            err = np.mean((noisySeg - trueSeg)**2)
            errorHistory.append(err)

            #show the denoised segment (if enabled)
            if showImageResults:
                plt.subplot(1, 3, 1)
                plt.imshow(noisySeg.reshape(
                    (util.segmentHeight, util.segmentWidth)),
                           cmap='gray')
                plt.title('Noisy Segment')
                plt.subplot(1, 3, 2)
                plt.imshow(denoisedSegment.reshape(
                    (util.segmentHeight, util.segmentWidth)),
                           cmap='gray')
                plt.title('Reconstructed Segment')
                plt.subplot(1, 3, 3)
                plt.imshow(trueSeg.reshape(
                    (util.segmentHeight, util.segmentWidth)),
                           cmap='gray')
                plt.title('True Noise-Free Segment')
                plt.show()

    #save results
    avgError = np.mean(errorHistory)
    print 'Average Mean Squared Error: ', avgError
    with open('errorHistory.pckl', 'wb') as fid:
        pickle.dump(errorHistory, fid)
Example #8
0
    def test_rvs(self, n=1000):
        g = gmm.GMM(self.nstates, self.ndim, self.cvtype)
        # Make sure the means are far apart so posteriors.argmax()
        # picks the actual component used to generate the observations.
        g.means = 20 * self.means
        g.covars = np.maximum(self.covars[self.cvtype], 0.1)
        g.weights = self.weights

        samples = g.rvs(n)
        self.assertEquals(samples.shape, (n, self.ndim))
Example #9
0
def main():
	print 'Loading means and vars...'

	means = np.load(os.path.join(OUTPUT_PATH, 'means.npy'))
	vars_ = np.load(os.path.join(OUTPUT_PATH, 'vars.npy'))

	print 'Building GMM...'
	g = gmm.GMM(means, vars_)

	g.train(LAYER, DATA_PATH, WEIGHTS_PATH, SOLVER_PATH, LAYER_SIZES[LAYER], num_iterations=100, batch_size=25, save_every=20)
Example #10
0
def main():
    obs = srk.Observation(np.loadtxt("data.txt"))
    correct_classes = np.loadtxt("correct.txt")

    # GMM単体
    g = gmm.GMM(4, category=correct_classes)
    g.connect(obs)
    g.update()

    # GMMとマルコフモデルを結合したモデル
    g = gmm.GMM(4, category=correct_classes)
    m = mm.MarkovModel()

    g.connect(obs)
    m.connect(g)

    for itr in range(5):
        g.update()
        m.update()
Example #11
0
def clusters_from_dataframe(df, ncluster):
    """
    determines clusters from a given dataframe
    input : df the dataframe from whom infer the clusters
    input : ncluster the number of clusters to separate
    """
    mgmm = gmm.GMM(df)
    clusters, centres = mgmm.result()
    mclusters = clusterisation.Clusterisation(clusters, centres)
    return mclusters.result(), mgmm
Example #12
0
def main():
    obs = srk.Observation(np.loadtxt("data.txt"))
    data_category = np.loadtxt("category.txt")

    vae1 = vae_model(18, epoch=200, batch_size=500)
    gmm1 = gmm.GMM(10, category=data_category)

    vae1.connect(obs)
    gmm1.connect(vae1)

    for i in range(5):
        print(i)
        vae1.update()
        gmm1.update()
Example #13
0
    def get_cavity_priors(self):
        """ get_cavity_priors()

            Find the 'cavity priors' for each data point as a Gaussian
            mixture, with each component in he mixture corresponding
            to a node that the data point appears in.

        """
        # travese tree setting params

        self.set_params(self.root_node)

        self.cavity_GMMs = []

        # get mixture models for each data point

        for it in range(self.data.shape[0]):
            path = self.find_path(it)

            # initialise a GMM
            cavity_GMM = gmm.GMM()

            node = self.root_node

            weight = node.prev_wk * math.exp(node.log_rk)
            mu, sigma = node.data_model.cavity_prior(self.data[it],
                                                     self.data_uncerts[it],
                                                     node.params)
            cavity_GMM.add_component(weight, mu, sigma)

            for direction in path:
                if direction == "left":
                    node = node.left_child
                elif direction == "right":
                    node = node.right_child

                mu, sigma = node.data_model.cavity_prior(
                    self.data[it], self.data_uncerts[it], node.params)

                if node.log_rk is not None:
                    weight = node.prev_wk * math.exp(node.log_rk)
                else:  # a leaf
                    weight = node.prev_wk

                cavity_GMM.add_component(weight, mu, sigma)

            cavity_GMM.normalise_weights()
            cavity_GMM.set_mean_covar()
            self.cavity_GMMs.append(cavity_GMM)
Example #14
0
    def get_global_posterior(self):
        """ get_global_posteriors()

            Find the posterior implied by the clustering as a Gaussian
            mixture, with each component in he mixture corresponding
            to a node in the clustering.

        """
        # travese tree setting params

        if not self.params_set:
            self.set_params()

        # initialise a GMM
        self.global_GMM = gmm.GMM()

        # Traverse tree
        for level_it in range(len(self.assignments)):

            for node in self.nodes[level_it].values():

                mu = node.params[0]
                sigma = node.params[1] + node.params[2]

                if node.log_rk is not None:
                    weight = node.prev_wk * math.exp(node.log_rk)
                else:  # leaf
                    weight = node.prev_wk

                if weight > 0:
                    self.global_GMM.add_component(weight, mu, sigma)

                # deal with bhc tree children
                if node.tree_terminated and node.nk > 1:

                    # check if single posteriors need finding
                    if node.true_bhc.global_GMM is None:
                        node.true_bhc.get_global_posterior()

                    self.global_GMM.weights.extend(
                        node.prev_wk * node.true_bhc.global_GMM.weights[1:])
                    self.global_GMM.means.extend(
                        node.true_bhc.global_GMM.means[1:])
                    self.global_GMM.covars.extend(
                        node.true_bhc.global_GMM.covars[1:])
                    self.global_GMM.K += node.true_bhc.global_GMM.K - 1

        self.global_GMM.normalise_weights()
        self.global_GMM.set_mean_covar()
Example #15
0
def main():
    obs = srk.Observation(np.loadtxt("data.txt"))
    data_category = np.loadtxt("category.txt")

    vae1 = vae.VAE(18, itr=200, batch_size=500)
    gmm1 = gmm.GMM(10, category=data_category)
    mm1 = mm.MarkovModel()

    vae1.connect(obs)
    gmm1.connect(vae1)
    mm1.connect(gmm1)

    for i in range(5):
        print(i)
        vae1.update()
        gmm1.update()
        mm1.update()
Example #16
0
    def test_eval(self):
        g = gmm.GMM(self.nstates, self.ndim, self.cvtype)
        # Make sure the means are far apart so posteriors.argmax()
        # picks the actual component used to generate the observations.
        g.means = 20 * self.means
        g.covars = self.covars[self.cvtype]

        gaussidx = np.repeat(range(self.nstates), 5)
        nobs = len(gaussidx)
        obs = np.random.randn(nobs, self.ndim) + g.means[gaussidx]

        ll, posteriors = g.eval(obs)

        self.assertEqual(len(ll), nobs)
        self.assertEqual(posteriors.shape, (nobs, self.nstates))
        assert_array_almost_equal(posteriors.sum(axis=1), np.ones(nobs))
        assert_array_equal(posteriors.argmax(axis=1), gaussidx)
    def contrast(self):
        """ reapply gmm on each sharpens cluster """

        newListDatas=[]

        for cluster in self.clustersList:
            diff = self.difference(cluster)
            sharp = self.sharpening(diff)
            newListDatas.append(sharp)
        newDataFrame=pd.concat(newListDatas) 
            
        
        newGmm = gmm.GMM(newDataFrame,self.numberCluster)
        dataFrame, centers = newGmm.result()
        clusterObject = clusterisation.Clusterisation(dataFrame,centers, isContrast = True)
        listeClusters = clusterObject.result()
        
        return listeClusters
Example #18
0
def main():
    obs1 = srk.Observation(np.loadtxt("data1.txt"))
    obs2 = srk.Observation(np.loadtxt("data2.txt"))
    data_category = np.loadtxt("category.txt")

    vae1 = vae_model(18, epoch=200, batch_size=500)
    gmm1 = gmm.GMM(10, category=data_category)
    mlda1 = mlda.MLDA(10, [200, 200], category=data_category)

    vae1.connect(obs1)
    gmm1.connect(vae1)
    mlda1.connect(obs2, gmm1)

    for i in range(5):
        print(i)
        vae1.update()
        gmm1.update()
        mlda1.update()
Example #19
0
def main():
    obs1 = srk.Observation(np.loadtxt("data1.txt"))
    obs2 = srk.Observation(np.loadtxt("data2.txt"))
    category = np.loadtxt("category.txt")

    vae1 = vae_model(10, epoch=200, batch_size=500)
    gmm1 = gmm.GMM(10, category=category)
    nn1 = NN_model(itr1=500, itr2=2000, batch_size1=500, batch_size2=500)

    vae1.connect(obs1)
    gmm1.connect(vae1)
    nn1.connect(gmm1, obs2)

    for i in range(10):
        print(i)
        vae1.update()
        gmm1.update()
        nn1.update()
Example #20
0
    def test_train(self, params='wmc'):
        g = gmm.GMM(self.nstates, self.ndim, self.cvtype)
        g.weights = self.weights
        g.means = self.means
        g.covars = 20 * self.covars[self.cvtype]

        # Create a training and testing set by sampling from the same
        # distribution.
        train_obs = g.rvs(n=100)
        test_obs = g.rvs(n=2)

        g.init(train_obs, params=params, minit='points')
        init_testll = g.lpdf(test_obs).sum()

        trainll = g.train(train_obs, iter=20, params=params)
        self.assert_(np.all(np.diff(trainll) > -1))

        post_testll = g.lpdf(test_obs).sum()
        #print self.__class__.__name__, init_testll, post_testll
        self.assertTrue(post_testll >= init_testll)
Example #21
0
    def get_global_posterior(self):
        """ get_global_posteriors()

            Find the posterior implied by the clustering as a Gaussian
            mixture, with each component in he mixture corresponding
            to a node in the clustering.

        """
        # initialise a GMM
        self.global_GMM = gmm.GMM()
        self.global_posterior_preds = []

        # Traverse tree

        self.add_node_posterior(self.root_node,
                                self.global_GMM,
                                self.global_posterior_preds,
                                recurse=True)

        self.global_GMM.normalise_weights()
        self.global_GMM.set_mean_covar()
Example #22
0
 def train(self, data):
     self.gmm_object = gmm.GMM(data, 2)
     self.gmm_object.train()
Example #23
0
    def get_individual_posterior(self, index):
        """ get_individual_posterior(index)

            Find the posteriors for a data point as a Gaussian
            mixture, with each component in he mixture corresponding
            to a node that the data point appears in.

            Parameters
            ----------
            index : int
                The index of the data point

            Returns
            -------
            post_GMM : gmm.GMM
                A Gaussian mixture model description of the posterior
        """
        # travese tree setting params

        if not self.params_set:
            self.set_params()

        # get mixture model for data point

        # initialise a GMM
        post_GMM = gmm.GMM()

        for level_it in range(len(self.assignments)):
            node_it = self.assignments[level_it][index]

            if node_it >= 0:
                node = self.nodes[level_it][node_it]

                if node.log_rk is not None:
                    weight = node.prev_wk * math.exp(node.log_rk)
                else:  # leaf
                    weight = node.prev_wk

                mu = node.params[0]
                sigma = node.params[1] + node.params[2]

                post_GMM.add_component(weight, mu, sigma)

                # deal with bhc tree children
                if node.tree_terminated and node.nk > 1:

                    # check if single posteriors need finding
                    if node.true_bhc.post_GMMs is None:
                        node.true_bhc.get_single_posteriors()

                    # find index of datum in this tree

                    tree_it = np.nonzero(
                        np.equal(node.true_bhc.data,
                                 self.data[index]).all(1))[0][0]
                    tree_GMM = node.true_bhc.post_GMMs[tree_it]

                    tree_prev_wk = (node.prev_wk)

                    post_GMM.weights.extend(tree_prev_wk *
                                            tree_GMM.weights[1:])
                    post_GMM.means.extend(tree_GMM.means[1:])
                    post_GMM.covars.extend(tree_GMM.covars[1:])
                    post_GMM.K += tree_GMM.K - 1

        post_GMM.normalise_weights()
        post_GMM.set_mean_covar()

        return post_GMM
Example #24
0
    def test_bad_cvtype(self):
        g = gmm.GMM(20, 1, self.cvtype)

        self.assertRaises(ValueError, gmm.GMM, 20, 1, 'badcvtype')
Example #25
0
    def get_single_posteriors(self):
        """ get_single_posteriors()

            Find the posteriors for each data point as a Gaussian 
            mixture, with each component in he mixture corresponding
            to a node that the data point appears in.

        """
        # travese tree setting params

        self.set_params()

        self.post_GMMs = []

        # get mixture models for each data point

        for datum_it in range(self.data.shape[0]):
            # initialise a GMM
            post_GMM = gmm.GMM()

            for level_it in range(len(self.assignments)):
                node_it = self.assignments[level_it][datum_it]

                if node_it >= 0:
                    node = self.nodes[level_it][node_it]

                    tree_it = np.nonzero(np.equal(
                                                node.data,
                                                self.data[datum_it])\
                                            .all(1))

                    if node.log_rk is not None:
                        weight = node.prev_wk * math.exp(node.log_rk)
                    else:  # leaf
                        weight = node.prev_wk
                    mu, sigma = node.data_model.single_posterior(
                        self.data[datum_it], self.data_uncerts[datum_it],
                        node.params)

                    post_GMM.add_component(weight, mu, sigma)

                    # deal with bhc tree children
                    if node.tree_terminated and node.nk > 1:

                        # check if single posteriors need finding
                        if node.true_bhc.post_GMMs is None:
                            node.true_bhc.get_single_posteriors()

                        # find index of datum in this tree

                        tree_it = np.nonzero(np.equal(
                                                node.true_bhc.data,
                                                self.data[datum_it])\
                                            .all(1))[0][0]
                        tree_GMM = node.true_bhc.post_GMMs[tree_it]

                        tree_prev_wk = (node.prev_wk)

                        post_GMM.weights.extend(tree_prev_wk *
                                                tree_GMM.weights[1:])
                        post_GMM.means.extend(tree_GMM.means[1:])
                        post_GMM.covars.extend(tree_GMM.covars[1:])

            post_GMM.normalise_weights()
            post_GMM.set_mean_covar()
            self.post_GMMs.append(post_GMM)