def test(file, max_n_components, n_classes):
    print('GaussianMixture for set: ' + file)

    dataset = utils.dataset_reader(file)

    X, y = utils.data_process(dataset)

    list_sse = []
    list_nmi = []
    for n_components in range(1, max_n_components + 1):
        gmm = GaussianMixture(n_components=n_components)
        gmm.fit(X)

        y_hat = gmm.predict(X)
        sse = utils.sum_of_squared_errors(X, y_hat, gmm.means_)
        nmi = utils.normalized_mutual_information(y, n_classes, y_hat,
                                                  n_components)

        print('{0:2d} components, SSE: {1:.2f}, NMI: {2:.4f}'.format(
            n_components, sse, nmi))
        #        print('iterations: ', gmm.n_iter_)
        #        print(gmm.means_, gmm.covariances_, gmm.weights_)
        #        print(gmm.lower_bound_)
        list_sse.append(sse)
        list_nmi.append(nmi)

    utils.plot_measure_vs_k('SSE', list_sse, range(1, max_n_components + 1))
    utils.plot_measure_vs_k('NMI', list_nmi, range(1, max_n_components + 1))
    def update_server_model(self):
        # The model must be regenerated with the new average parameters. It cannot simply be updated (it might be initialized again with wrong parameters)
        self.model = GaussianMixture(
            X=self.init_dataset,
            n_components=self.args.components,
            random_state=self.random_state,
            is_quiet=True,
            init_params=self.args.init,
            weights_init=self.avg_clients_weights,
            means_init=self.avg_clients_means,
            precisions_init=self.avg_clients_precisions)

        return
Exemple #3
0
    def testPredictClasses(self):
        """
        Assert that torch.FloatTensor is handled correctly.
        """
        x = torch.randn(4, 2)
        n_components = np.random.randint(1, 100)

        model = GaussianMixture(n_components, x.size(1))
        model.fit(x)
        y = model.predict(x)

        # check that dimensionality of class memberships is (n)
        self.assertEqual(torch.Tensor(x.size(0)).size(), y.size())
Exemple #4
0
    def testPredictProbabilities(self):
        """
        Assert that torch.FloatTensor is handled correctly when returning class probabilities.
        """
        x = torch.randn(4, 2)
        n_components = np.random.randint(1, 100)

        model = GaussianMixture(n_components, x.size(1))
        model.fit(x)

        # check that y_p has dimensions (n, k)
        y_p = model.predict(x, probs=True)
        self.assertEqual(
            torch.Tensor(x.size(0), n_components).size(), y_p.size())
    def __init__(self, args, init_dataset, clients, output_dir):
        self.random_state = None
        if args.seed: self.random_state = (int(args.seed))
        self.model = GaussianMixture(X=init_dataset,
                                     n_components=args.components,
                                     random_state=self.random_state,
                                     is_quiet=True,
                                     init_params=args.init)

        self.init_dataset = init_dataset
        self.args = args
        self.rounds = args.rounds
        self.clients = clients
        self.fraction_clients = float(args.C)
        self.n_clients = int(args.K)
        self.n_clients_round = int(self.fraction_clients * self.n_clients)
        self.selected_clients = {}
        self.output_dir = output_dir
        self.metrics_history = {'aic': [], 'bic': [], 'll': []}
Exemple #6
0
def main():
    n, d = 300, 2

    # generate some data points ..
    data = torch.Tensor(n, d).normal_()
    # .. and shift them around to non-standard Gaussians
    data[:n//2] -= 1
    data[:n//2] *= sqrt(3)
    data[n//2:] += 1
    data[n//2:] *= sqrt(2)

    # Next, the Gaussian mixture is instantiated and ..
    n_components = 2
    model = GaussianMixture(n_components, d)
    model.fit(data)
    # .. used to predict the data points as they where shifted
    y = model.predict(data)

    plot(data, y)
class Server():
    def __init__(self, args, init_dataset, clients, output_dir):
        self.random_state = None
        if args.seed: self.random_state = (int(args.seed))
        self.model = GaussianMixture(X=init_dataset,
                                     n_components=args.components,
                                     random_state=self.random_state,
                                     is_quiet=True,
                                     init_params=args.init)

        self.init_dataset = init_dataset
        self.args = args
        self.rounds = args.rounds
        self.clients = clients
        self.fraction_clients = float(args.C)
        self.n_clients = int(args.K)
        self.n_clients_round = int(self.fraction_clients * self.n_clients)
        self.selected_clients = {}
        self.output_dir = output_dir
        self.metrics_history = {'aic': [], 'bic': [], 'll': []}

    def _select_round_clients(self, round):
        idxs_round_clients = np.random.choice(range(self.n_clients),
                                              self.n_clients_round,
                                              replace=False)
        selected_clients = []
        for idx in idxs_round_clients:
            selected_clients.append(self.clients[idx])

        self.selected_clients[round] = selected_clients

        return selected_clients

    def _set_parameters_from_clients_models(self, round_history):
        self.clients_means = []
        self.clients_covariances = []
        self.clients_weights = []

        for client_id in round_history:
            parameters = round_history[client_id]['parameters']

            self.clients_means.append(parameters['means'][-1])
            self.clients_covariances.append(parameters['covariances'][-1])
            self.clients_weights.append(parameters['weights'][-1])

        self.clients_means = np.array(self.clients_means)
        self.clients_covariances = np.array(self.clients_covariances)
        self.clients_weights = np.array(self.clients_weights)

        return

    def _set_metrics_from_clients_models(self, round_history):
        self.clients_aic = []
        self.clients_bic = []
        self.clients_ll = []

        for client_id in round_history:
            metrics = round_history[client_id]['metrics']

            self.clients_aic.append(metrics['aic'][-1])
            self.clients_bic.append(metrics['bic'][-1])
            self.clients_ll.append(metrics['ll'][-1])

        self.clients_aic = np.array(self.clients_aic)
        self.clients_bic = np.array(self.clients_bic)
        self.clients_ll = np.array(self.clients_ll)

        return

    def start_round(self, round):
        selected_clients = self._select_round_clients(round)

        round_history = {}

        pbar = tqdm(selected_clients)
        for client in pbar:
            pbar.set_description('Round: {}/{} | Client: {}'.format(
                round + 1, self.rounds, client.id))
            round_history[client.id] = client.fit(self.model,
                                                  self.args.local_epochs)

            if pbar.iterable[-1] == client:
                pbar.set_description('Round: {}/{} | Completed'.format(
                    round + 1, self.rounds))

        self._set_parameters_from_clients_models(round_history)
        self._set_metrics_from_clients_models(round_history)

        return

    def _sort_clients_distributions(self, update_reference: bool = False):
        reference_distributions = []

        for component_idx in range(self.args.components):
            client_idx = 0  # First client
            distribution = NormalDistribution(
                self.clients_means[client_idx][component_idx],
                self.clients_covariances[client_idx][component_idx])
            reference_distributions.append(distribution)

        for client_idx in range(1, self.n_clients_round):
            selected_components_idxs = []

            for component_idx in range(self.args.components):
                distances = []
                distances_idxs = []

                for target_component_idx in range(self.args.components):
                    if target_component_idx in selected_components_idxs:
                        pass
                    else:
                        means = self.clients_means[client_idx][
                            target_component_idx]
                        covariances = self.clients_covariances[client_idx][
                            target_component_idx]
                        target_distribution = NormalDistribution(
                            means, covariances)

                        distance = get_hellinger_multivariate(
                            reference_distributions[component_idx],
                            target_distribution)
                        distances.append(distance)
                        distances_idxs.append(target_component_idx)

                min_idx = np.argmin(distances)
                selected_components_idxs.append(distances_idxs[min_idx])

            selected_components_idxs = np.array(selected_components_idxs)
            self.clients_means[client_idx] = self.clients_means[client_idx][
                selected_components_idxs]
            self.clients_covariances[client_idx] = self.clients_covariances[
                client_idx][selected_components_idxs]
            self.clients_weights[client_idx] = self.clients_weights[
                client_idx][selected_components_idxs]

            if update_reference is True:
                avg_means = []
                reference_means = [
                    reference_distributions[component_idx].means
                    for component_idx in range(self.args.components)
                ]
                avg_means.append(np.array(reference_means))
                avg_means.append(np.array(self.clients_means[client_idx]))
                avg_means = np.array(avg_means)

                avg_covariances = []
                reference_covariances = [
                    reference_distributions[component_idx].covariances
                    for component_idx in range(self.args.components)
                ]
                avg_covariances.append(np.array(reference_covariances))
                avg_covariances.append(
                    np.array(self.clients_covariances[client_idx]))
                avg_covariances = np.array(avg_covariances)

                gamma = 1 / avg_means.shape[0]

                avg_means = np.sum(avg_means * pow(gamma, 1), axis=0)
                avg_covariances = np.sum(avg_covariances * pow(gamma, 2),
                                         axis=0)

                reference_distributions = []
                for component_idx in range(self.args.components):
                    distribution = NormalDistribution(
                        avg_means[component_idx],
                        avg_covariances[component_idx])
                    reference_distributions.append(distribution)

        return

    def average_clients_models(self,
                               use_hellinger_distance: bool = True,
                               update_reference: bool = False):

        if use_hellinger_distance is True:
            self._sort_clients_distributions(update_reference)

        gamma = 1 / self.n_clients_round  # weight for each client (the same)

        self.avg_clients_means = np.sum(self.clients_means * pow(gamma, 1),
                                        axis=0)
        self.avg_clients_covariances = np.sum(self.clients_covariances *
                                              pow(gamma, 2),
                                              axis=0)
        self.avg_clients_weights = np.sum(self.clients_weights * pow(gamma, 1),
                                          axis=0)

        self.avg_clients_precisions_cholesky = self.model.compute_precision_cholesky(
            self.avg_clients_covariances, self.model.covariance_type)

        params = (self.avg_clients_weights, self.avg_clients_means,
                  self.avg_clients_covariances,
                  self.avg_clients_precisions_cholesky)
        self.model.set_parameters(params)

        self.avg_clients_precisions = self.model.precisions_

        return

    def update_server_model(self):
        # The model must be regenerated with the new average parameters. It cannot simply be updated (it might be initialized again with wrong parameters)
        self.model = GaussianMixture(
            X=self.init_dataset,
            n_components=self.args.components,
            random_state=self.random_state,
            is_quiet=True,
            init_params=self.args.init,
            weights_init=self.avg_clients_weights,
            means_init=self.avg_clients_means,
            precisions_init=self.avg_clients_precisions)

        return

    def average_clients_metrics(self):
        self.metrics_history['aic'].append(np.mean(self.clients_aic))
        self.metrics_history['bic'].append(np.mean(self.clients_bic))
        self.metrics_history['ll'].append(np.mean(self.clients_ll))

        return

    def plot(self, X, labels, round=None):
        self.model.plot(X, labels, self.args, self.output_dir, 'round', round)

        return

    def compute_init_metrics(self, X):
        self.metrics_history['aic'].append(self.model.aic(X))
        self.metrics_history['bic'].append(self.model.bic(X))
        self.metrics_history['ll'].append(self.model.score(X))

        return
Exemple #8
0
    def testEmMatchesSkLearn(self):
        """
        Assert that log-probabilities (E-step) and parameter updates (M-step) approximately match those of sklearn.
        """
        d = 20
        n_components = np.random.randint(1, 100)

        # (n, k, d)
        x = torch.randn(40, 1, d)
        # (n, d)
        x_np = np.squeeze(x.data.numpy())

        var_init = torch.ones(1, n_components, d) - .4

        model = GaussianMixture(n_components, d, var_init=var_init)
        model_sk = sklearn.mixture.GaussianMixture(
            n_components,
            covariance_type="diag",
            init_params="random",
            means_init=np.squeeze(model.mu.data.numpy()),
            precisions_init=np.squeeze(1. / np.sqrt(var_init.data.numpy())))

        model_sk._initialize_parameters(x_np, np.random.RandomState())
        log_prob_sk = model_sk._estimate_log_prob(x_np)
        log_prob = model._estimate_log_prob(x)

        # Test whether log-probabilities are approximately equal
        np.testing.assert_almost_equal(np.squeeze(log_prob.data.numpy()),
                                       log_prob_sk,
                                       decimal=2,
                                       verbose=True)

        _, log_resp_sk = model_sk._e_step(x_np)
        _, log_resp = model._e_step(x)

        # Test whether E-steps are approximately equal
        np.testing.assert_almost_equal(np.squeeze(log_resp.data.numpy()),
                                       log_resp_sk,
                                       decimal=0,
                                       verbose=True)

        model_sk._m_step(x_np, log_prob_sk)
        pi_sk = model_sk.weights_
        mu_sk = model_sk.means_
        var_sk = model_sk.means_

        pi, mu, var = model._m_step(x, log_prob)

        # Test whether pi ..
        np.testing.assert_almost_equal(np.squeeze(pi.data.numpy()),
                                       pi_sk,
                                       decimal=1,
                                       verbose=True)

        # .. mu ..
        np.testing.assert_almost_equal(np.squeeze(mu.data.numpy()),
                                       mu_sk,
                                       decimal=1,
                                       verbose=True)

        # .. and var are approximately equal
        np.testing.assert_almost_equal(np.squeeze(var.data.numpy()),
                                       var_sk,
                                       decimal=1,
                                       verbose=True)
Exemple #9
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 14 19:53:43 2018

@author: Garrett
"""

from kmeans import KMeans
#from sklearn.cluster import KMeans

from gmm import GaussianMixture
import numpy as np

X = np.array([[2, 2], [3, 4], [1, 0], [101, 2], [102, 4], [100, 0]])
kmeans = KMeans(n_clusters=2).fit(X)
#print(kmeans.labels_)
#print(kmeans.predict(np.array([[0, 0], [4, 4]])))
#print(kmeans.cluster_centers_)

gmm = GaussianMixture(n_components=2).fit(X)
print('gmm predict  ', gmm.predict(X))
#print(gmm.predict(np.array([[0, 0], [4, 4]])))
print('gmm.means_  ', gmm.means_)
print('gmm.covariances_  ', gmm.covariances_)
print('gmm.n_iter', gmm.n_iter_)
Exemple #10
0
    train_dataset, train_dataset_labels, _ = get_dataset(args)

    print_configuration(args, train_dataset, False)
    save_configuration(args, train_dataset, output_dir, False)

    # Init the Gaussian Mixture Model
    seed = None
    if args.seed: seed = (int(args.seed))

    # Prepare server --> init_dataset is given by 0.5% of the train_dataset randomly sampled
    # init_dataset_size = int(train_dataset.shape[0] * 0.005)
    # init_dataset = train_dataset[np.random.choice(train_dataset.shape[0], init_dataset_size, replace=False)]
    init_dataset = train_dataset

    model = GaussianMixture(X=init_dataset,
                            n_components=args.components,
                            random_state=seed,
                            init_params=args.init)

    init_metrics = {
        'aic': model.aic(train_dataset),
        'bic': model.bic(train_dataset),
        'll': model.score(train_dataset)
    }

    model.fit(train_dataset, args.epochs, train_dataset_labels, args,
              output_dir)

    predicted_labels = model.predict_proba(train_dataset).tolist()
    predicted_labels = np.array(predicted_labels)

    print('\nSaving images...')
Exemple #11
0
def gmm(opt):
    return GaussianMixture(opt.GMM_NUM_COMPONENTS)