예제 #1
0
def test(file, max_n_components, n_classes):
    print('GaussianMixture for set: ' + file)

    dataset = utils.dataset_reader(file)

    X, y = utils.data_process(dataset)

    list_sse = []
    list_nmi = []
    for n_components in range(1, max_n_components + 1):
        gmm = GaussianMixture(n_components=n_components)
        gmm.fit(X)

        y_hat = gmm.predict(X)
        sse = utils.sum_of_squared_errors(X, y_hat, gmm.means_)
        nmi = utils.normalized_mutual_information(y, n_classes, y_hat,
                                                  n_components)

        print('{0:2d} components, SSE: {1:.2f}, NMI: {2:.4f}'.format(
            n_components, sse, nmi))
        #        print('iterations: ', gmm.n_iter_)
        #        print(gmm.means_, gmm.covariances_, gmm.weights_)
        #        print(gmm.lower_bound_)
        list_sse.append(sse)
        list_nmi.append(nmi)

    utils.plot_measure_vs_k('SSE', list_sse, range(1, max_n_components + 1))
    utils.plot_measure_vs_k('NMI', list_nmi, range(1, max_n_components + 1))
    def update_server_model(self):
        # The model must be regenerated with the new average parameters. It cannot simply be updated (it might be initialized again with wrong parameters)
        self.model = GaussianMixture(
            X=self.init_dataset,
            n_components=self.args.components,
            random_state=self.random_state,
            is_quiet=True,
            init_params=self.args.init,
            weights_init=self.avg_clients_weights,
            means_init=self.avg_clients_means,
            precisions_init=self.avg_clients_precisions)

        return
예제 #3
0
    def testPredictClasses(self):
        """
        Assert that torch.FloatTensor is handled correctly.
        """
        x = torch.randn(4, 2)
        n_components = np.random.randint(1, 100)

        model = GaussianMixture(n_components, x.size(1))
        model.fit(x)
        y = model.predict(x)

        # check that dimensionality of class memberships is (n)
        self.assertEqual(torch.Tensor(x.size(0)).size(), y.size())
예제 #4
0
    def testPredictProbabilities(self):
        """
        Assert that torch.FloatTensor is handled correctly when returning class probabilities.
        """
        x = torch.randn(4, 2)
        n_components = np.random.randint(1, 100)

        model = GaussianMixture(n_components, x.size(1))
        model.fit(x)

        # check that y_p has dimensions (n, k)
        y_p = model.predict(x, probs=True)
        self.assertEqual(
            torch.Tensor(x.size(0), n_components).size(), y_p.size())
    def __init__(self, args, init_dataset, clients, output_dir):
        self.random_state = None
        if args.seed: self.random_state = (int(args.seed))
        self.model = GaussianMixture(X=init_dataset,
                                     n_components=args.components,
                                     random_state=self.random_state,
                                     is_quiet=True,
                                     init_params=args.init)

        self.init_dataset = init_dataset
        self.args = args
        self.rounds = args.rounds
        self.clients = clients
        self.fraction_clients = float(args.C)
        self.n_clients = int(args.K)
        self.n_clients_round = int(self.fraction_clients * self.n_clients)
        self.selected_clients = {}
        self.output_dir = output_dir
        self.metrics_history = {'aic': [], 'bic': [], 'll': []}
예제 #6
0
def main():
    n, d = 300, 2

    # generate some data points ..
    data = torch.Tensor(n, d).normal_()
    # .. and shift them around to non-standard Gaussians
    data[:n//2] -= 1
    data[:n//2] *= sqrt(3)
    data[n//2:] += 1
    data[n//2:] *= sqrt(2)

    # Next, the Gaussian mixture is instantiated and ..
    n_components = 2
    model = GaussianMixture(n_components, d)
    model.fit(data)
    # .. used to predict the data points as they where shifted
    y = model.predict(data)

    plot(data, y)
예제 #7
0
    def testEmMatchesSkLearn(self):
        """
        Assert that log-probabilities (E-step) and parameter updates (M-step) approximately match those of sklearn.
        """
        d = 20
        n_components = np.random.randint(1, 100)

        # (n, k, d)
        x = torch.randn(40, 1, d)
        # (n, d)
        x_np = np.squeeze(x.data.numpy())

        var_init = torch.ones(1, n_components, d) - .4

        model = GaussianMixture(n_components, d, var_init=var_init)
        model_sk = sklearn.mixture.GaussianMixture(
            n_components,
            covariance_type="diag",
            init_params="random",
            means_init=np.squeeze(model.mu.data.numpy()),
            precisions_init=np.squeeze(1. / np.sqrt(var_init.data.numpy())))

        model_sk._initialize_parameters(x_np, np.random.RandomState())
        log_prob_sk = model_sk._estimate_log_prob(x_np)
        log_prob = model._estimate_log_prob(x)

        # Test whether log-probabilities are approximately equal
        np.testing.assert_almost_equal(np.squeeze(log_prob.data.numpy()),
                                       log_prob_sk,
                                       decimal=2,
                                       verbose=True)

        _, log_resp_sk = model_sk._e_step(x_np)
        _, log_resp = model._e_step(x)

        # Test whether E-steps are approximately equal
        np.testing.assert_almost_equal(np.squeeze(log_resp.data.numpy()),
                                       log_resp_sk,
                                       decimal=0,
                                       verbose=True)

        model_sk._m_step(x_np, log_prob_sk)
        pi_sk = model_sk.weights_
        mu_sk = model_sk.means_
        var_sk = model_sk.means_

        pi, mu, var = model._m_step(x, log_prob)

        # Test whether pi ..
        np.testing.assert_almost_equal(np.squeeze(pi.data.numpy()),
                                       pi_sk,
                                       decimal=1,
                                       verbose=True)

        # .. mu ..
        np.testing.assert_almost_equal(np.squeeze(mu.data.numpy()),
                                       mu_sk,
                                       decimal=1,
                                       verbose=True)

        # .. and var are approximately equal
        np.testing.assert_almost_equal(np.squeeze(var.data.numpy()),
                                       var_sk,
                                       decimal=1,
                                       verbose=True)
예제 #8
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 14 19:53:43 2018

@author: Garrett
"""

from kmeans import KMeans
#from sklearn.cluster import KMeans

from gmm import GaussianMixture
import numpy as np

X = np.array([[2, 2], [3, 4], [1, 0], [101, 2], [102, 4], [100, 0]])
kmeans = KMeans(n_clusters=2).fit(X)
#print(kmeans.labels_)
#print(kmeans.predict(np.array([[0, 0], [4, 4]])))
#print(kmeans.cluster_centers_)

gmm = GaussianMixture(n_components=2).fit(X)
print('gmm predict  ', gmm.predict(X))
#print(gmm.predict(np.array([[0, 0], [4, 4]])))
print('gmm.means_  ', gmm.means_)
print('gmm.covariances_  ', gmm.covariances_)
print('gmm.n_iter', gmm.n_iter_)
예제 #9
0
    train_dataset, train_dataset_labels, _ = get_dataset(args)

    print_configuration(args, train_dataset, False)
    save_configuration(args, train_dataset, output_dir, False)

    # Init the Gaussian Mixture Model
    seed = None
    if args.seed: seed = (int(args.seed))

    # Prepare server --> init_dataset is given by 0.5% of the train_dataset randomly sampled
    # init_dataset_size = int(train_dataset.shape[0] * 0.005)
    # init_dataset = train_dataset[np.random.choice(train_dataset.shape[0], init_dataset_size, replace=False)]
    init_dataset = train_dataset

    model = GaussianMixture(X=init_dataset,
                            n_components=args.components,
                            random_state=seed,
                            init_params=args.init)

    init_metrics = {
        'aic': model.aic(train_dataset),
        'bic': model.bic(train_dataset),
        'll': model.score(train_dataset)
    }

    model.fit(train_dataset, args.epochs, train_dataset_labels, args,
              output_dir)

    predicted_labels = model.predict_proba(train_dataset).tolist()
    predicted_labels = np.array(predicted_labels)

    print('\nSaving images...')
예제 #10
0
def gmm(opt):
    return GaussianMixture(opt.GMM_NUM_COMPONENTS)