class Server():
    def __init__(self, args, init_dataset, clients, output_dir):
        self.random_state = None
        if args.seed: self.random_state = (int(args.seed))
        self.model = GaussianMixture(X=init_dataset,
                                     n_components=args.components,
                                     random_state=self.random_state,
                                     is_quiet=True,
                                     init_params=args.init)

        self.init_dataset = init_dataset
        self.args = args
        self.rounds = args.rounds
        self.clients = clients
        self.fraction_clients = float(args.C)
        self.n_clients = int(args.K)
        self.n_clients_round = int(self.fraction_clients * self.n_clients)
        self.selected_clients = {}
        self.output_dir = output_dir
        self.metrics_history = {'aic': [], 'bic': [], 'll': []}

    def _select_round_clients(self, round):
        idxs_round_clients = np.random.choice(range(self.n_clients),
                                              self.n_clients_round,
                                              replace=False)
        selected_clients = []
        for idx in idxs_round_clients:
            selected_clients.append(self.clients[idx])

        self.selected_clients[round] = selected_clients

        return selected_clients

    def _set_parameters_from_clients_models(self, round_history):
        self.clients_means = []
        self.clients_covariances = []
        self.clients_weights = []

        for client_id in round_history:
            parameters = round_history[client_id]['parameters']

            self.clients_means.append(parameters['means'][-1])
            self.clients_covariances.append(parameters['covariances'][-1])
            self.clients_weights.append(parameters['weights'][-1])

        self.clients_means = np.array(self.clients_means)
        self.clients_covariances = np.array(self.clients_covariances)
        self.clients_weights = np.array(self.clients_weights)

        return

    def _set_metrics_from_clients_models(self, round_history):
        self.clients_aic = []
        self.clients_bic = []
        self.clients_ll = []

        for client_id in round_history:
            metrics = round_history[client_id]['metrics']

            self.clients_aic.append(metrics['aic'][-1])
            self.clients_bic.append(metrics['bic'][-1])
            self.clients_ll.append(metrics['ll'][-1])

        self.clients_aic = np.array(self.clients_aic)
        self.clients_bic = np.array(self.clients_bic)
        self.clients_ll = np.array(self.clients_ll)

        return

    def start_round(self, round):
        selected_clients = self._select_round_clients(round)

        round_history = {}

        pbar = tqdm(selected_clients)
        for client in pbar:
            pbar.set_description('Round: {}/{} | Client: {}'.format(
                round + 1, self.rounds, client.id))
            round_history[client.id] = client.fit(self.model,
                                                  self.args.local_epochs)

            if pbar.iterable[-1] == client:
                pbar.set_description('Round: {}/{} | Completed'.format(
                    round + 1, self.rounds))

        self._set_parameters_from_clients_models(round_history)
        self._set_metrics_from_clients_models(round_history)

        return

    def _sort_clients_distributions(self, update_reference: bool = False):
        reference_distributions = []

        for component_idx in range(self.args.components):
            client_idx = 0  # First client
            distribution = NormalDistribution(
                self.clients_means[client_idx][component_idx],
                self.clients_covariances[client_idx][component_idx])
            reference_distributions.append(distribution)

        for client_idx in range(1, self.n_clients_round):
            selected_components_idxs = []

            for component_idx in range(self.args.components):
                distances = []
                distances_idxs = []

                for target_component_idx in range(self.args.components):
                    if target_component_idx in selected_components_idxs:
                        pass
                    else:
                        means = self.clients_means[client_idx][
                            target_component_idx]
                        covariances = self.clients_covariances[client_idx][
                            target_component_idx]
                        target_distribution = NormalDistribution(
                            means, covariances)

                        distance = get_hellinger_multivariate(
                            reference_distributions[component_idx],
                            target_distribution)
                        distances.append(distance)
                        distances_idxs.append(target_component_idx)

                min_idx = np.argmin(distances)
                selected_components_idxs.append(distances_idxs[min_idx])

            selected_components_idxs = np.array(selected_components_idxs)
            self.clients_means[client_idx] = self.clients_means[client_idx][
                selected_components_idxs]
            self.clients_covariances[client_idx] = self.clients_covariances[
                client_idx][selected_components_idxs]
            self.clients_weights[client_idx] = self.clients_weights[
                client_idx][selected_components_idxs]

            if update_reference is True:
                avg_means = []
                reference_means = [
                    reference_distributions[component_idx].means
                    for component_idx in range(self.args.components)
                ]
                avg_means.append(np.array(reference_means))
                avg_means.append(np.array(self.clients_means[client_idx]))
                avg_means = np.array(avg_means)

                avg_covariances = []
                reference_covariances = [
                    reference_distributions[component_idx].covariances
                    for component_idx in range(self.args.components)
                ]
                avg_covariances.append(np.array(reference_covariances))
                avg_covariances.append(
                    np.array(self.clients_covariances[client_idx]))
                avg_covariances = np.array(avg_covariances)

                gamma = 1 / avg_means.shape[0]

                avg_means = np.sum(avg_means * pow(gamma, 1), axis=0)
                avg_covariances = np.sum(avg_covariances * pow(gamma, 2),
                                         axis=0)

                reference_distributions = []
                for component_idx in range(self.args.components):
                    distribution = NormalDistribution(
                        avg_means[component_idx],
                        avg_covariances[component_idx])
                    reference_distributions.append(distribution)

        return

    def average_clients_models(self,
                               use_hellinger_distance: bool = True,
                               update_reference: bool = False):

        if use_hellinger_distance is True:
            self._sort_clients_distributions(update_reference)

        gamma = 1 / self.n_clients_round  # weight for each client (the same)

        self.avg_clients_means = np.sum(self.clients_means * pow(gamma, 1),
                                        axis=0)
        self.avg_clients_covariances = np.sum(self.clients_covariances *
                                              pow(gamma, 2),
                                              axis=0)
        self.avg_clients_weights = np.sum(self.clients_weights * pow(gamma, 1),
                                          axis=0)

        self.avg_clients_precisions_cholesky = self.model.compute_precision_cholesky(
            self.avg_clients_covariances, self.model.covariance_type)

        params = (self.avg_clients_weights, self.avg_clients_means,
                  self.avg_clients_covariances,
                  self.avg_clients_precisions_cholesky)
        self.model.set_parameters(params)

        self.avg_clients_precisions = self.model.precisions_

        return

    def update_server_model(self):
        # The model must be regenerated with the new average parameters. It cannot simply be updated (it might be initialized again with wrong parameters)
        self.model = GaussianMixture(
            X=self.init_dataset,
            n_components=self.args.components,
            random_state=self.random_state,
            is_quiet=True,
            init_params=self.args.init,
            weights_init=self.avg_clients_weights,
            means_init=self.avg_clients_means,
            precisions_init=self.avg_clients_precisions)

        return

    def average_clients_metrics(self):
        self.metrics_history['aic'].append(np.mean(self.clients_aic))
        self.metrics_history['bic'].append(np.mean(self.clients_bic))
        self.metrics_history['ll'].append(np.mean(self.clients_ll))

        return

    def plot(self, X, labels, round=None):
        self.model.plot(X, labels, self.args, self.output_dir, 'round', round)

        return

    def compute_init_metrics(self, X):
        self.metrics_history['aic'].append(self.model.aic(X))
        self.metrics_history['bic'].append(self.model.bic(X))
        self.metrics_history['ll'].append(self.model.score(X))

        return
예제 #2
0
    if args.seed: seed = (int(args.seed))

    # Prepare server --> init_dataset is given by 0.5% of the train_dataset randomly sampled
    # init_dataset_size = int(train_dataset.shape[0] * 0.005)
    # init_dataset = train_dataset[np.random.choice(train_dataset.shape[0], init_dataset_size, replace=False)]
    init_dataset = train_dataset

    model = GaussianMixture(X=init_dataset,
                            n_components=args.components,
                            random_state=seed,
                            init_params=args.init)

    init_metrics = {
        'aic': model.aic(train_dataset),
        'bic': model.bic(train_dataset),
        'll': model.score(train_dataset)
    }

    model.fit(train_dataset, args.epochs, train_dataset_labels, args,
              output_dir)

    predicted_labels = model.predict_proba(train_dataset).tolist()
    predicted_labels = np.array(predicted_labels)

    print('\nSaving images...')

    metrics = model.history_['metrics']

    for key in metrics:
        metrics[key].insert(0, init_metrics[key])