コード例 #1
0
    def test_create_lda_partitions_imbalanced_not_set(self) -> None:
        """Test if Latent Dirichlet Allocation rejects imbalanced
        partitions."""
        # Prepare
        num_partitions = 3
        concentration = 1e-3

        # Execute
        with self.assertRaises(ValueError):
            create_lda_partitions(
                dataset=self.ds,
                num_partitions=num_partitions,
                concentration=concentration,
            )
コード例 #2
0
    def test_create_lda_partitions_elements_wrong_list_concentration(
            self) -> None:
        """Test if partitions from Latent Dirichlet Allocation contain the same
        elements."""
        # Prepare
        num_partitions = 5
        concentration = (self.num_classes + 1) * [0.5]

        # Execute
        with self.assertRaises(ValueError):
            create_lda_partitions(
                dataset=self.ds,
                num_partitions=num_partitions,
                concentration=concentration,
            )
コード例 #3
0
ファイル: dataset_utils.py プロジェクト: sisco0/flower
def do_fl_partitioning(path_to_dataset,
                       pool_size,
                       alpha,
                       num_classes,
                       val_ratio=0.0):
    """Torchvision (e.g. CIFAR-10) datasets using LDA."""

    images, labels = torch.load(path_to_dataset)
    idx = np.array(range(len(images)))
    dataset = [idx, labels]
    partitions, _ = create_lda_partitions(dataset,
                                          num_partitions=pool_size,
                                          concentration=alpha,
                                          accept_imbalanced=True)

    # Show label distribution for first partition (purely informative)
    partition_zero = partitions[0][1]
    hist, _ = np.histogram(partition_zero, bins=list(range(num_classes + 1)))
    print(
        f"Class histogram for 0-th partition (alpha={alpha}, {num_classes} classes): {hist}"
    )

    # now save partitioned dataset to disk
    # first delete dir containing splits (if exists), then create it
    splits_dir = path_to_dataset.parent / "federated"
    if splits_dir.exists():
        shutil.rmtree(splits_dir)
    Path.mkdir(splits_dir, parents=True)

    for p in range(pool_size):

        labels = partitions[p][1]
        image_idx = partitions[p][0]
        imgs = images[image_idx]

        # create dir
        Path.mkdir(splits_dir / str(p))

        if val_ratio > 0.0:
            # split data according to val_ratio
            train_idx, val_idx = get_random_id_splits(len(labels), val_ratio)
            val_imgs = imgs[val_idx]
            val_labels = labels[val_idx]

            with open(splits_dir / str(p) / "val.pt", "wb") as f:
                torch.save([val_imgs, val_labels], f)

            # remaining images for training
            imgs = imgs[train_idx]
            labels = labels[train_idx]

        with open(splits_dir / str(p) / "train.pt", "wb") as f:
            torch.save([imgs, labels], f)

    return splits_dir
コード例 #4
0
    def test_create_lda_partitions_elements_list_concentration(self) -> None:
        """Test if partitions from Latent Dirichlet Allocation contain the same
        elements."""
        # Prepare
        num_partitions = 5
        concentration = self.num_classes * [0.5]

        # Execute
        partitions, _ = create_lda_partitions(dataset=self.ds,
                                              num_partitions=num_partitions,
                                              concentration=concentration)
        x_lda = np.concatenate([item[0] for item in partitions])
        y_lda = np.concatenate([item[1] for item in partitions])

        # Assert
        assert_identity(xy_0=self.ds, xy_1=(x_lda, y_lda))
コード例 #5
0
    def test_create_lda_partitions_alpha_near_zero(self) -> None:
        """Test if Latent Dirichlet Allocation partitions will give single
        class distribution when concentration is near zero (~1e-3)."""
        # Prepare
        num_partitions = 5
        concentration = 1e-3

        # Execute
        _, distributions = create_lda_partitions(dataset=self.ds,
                                                 num_partitions=num_partitions,
                                                 concentration=concentration)
        test_num_partitions, _ = distributions.shape

        # Assert
        for part in range(test_num_partitions):
            this_distribution = distributions[part]
            max_prob = np.max(this_distribution)
            assert max_prob > 0.5
コード例 #6
0
    def test_create_lda_partitions_imbalanced(self) -> None:
        """Test if Latent Dirichlet Allocation accepts imbalanced partitions if
        accept_imbalanced is set."""
        # Prepare
        num_partitions = 3
        concentration = 1e-3

        # Execute
        partitions, _ = create_lda_partitions(
            dataset=self.ds,
            num_partitions=num_partitions,
            concentration=concentration,
            accept_imbalanced=True,
        )
        numel_list = [x.shape[0] for (x, y) in partitions]
        total_samples = np.sum(numel_list)

        # Assert
        assert total_samples == self.num_samples
コード例 #7
0
    def test_create_lda_partitions_large_alpha(self) -> None:
        """Test if Latent Dirichlet Allocation partitions will give near
        uniform distribution when concentration is large(~1e5)."""
        # Prepare
        num_partitions = 5
        concentration = 1e5
        uniform = (1.0 / (self.num_classes) * np.ones(
            (self.num_classes, ), dtype=np.float))

        # Execute
        _, distributions = create_lda_partitions(dataset=self.ds,
                                                 num_partitions=num_partitions,
                                                 concentration=concentration)
        test_num_partitions, _ = distributions.shape

        # Assert
        for part in range(test_num_partitions):
            this_distribution = distributions[part]
            np.testing.assert_array_almost_equal(this_distribution,
                                                 uniform,
                                                 decimal=3)
コード例 #8
0
    def test_create_lda_partitions_with_inf_alpha(self) -> None:
        """Test if partitions created with concentration=Inf will produce
        uniform partitions."""
        # Prepare
        num_partitions = 5
        concentration = float("inf")

        # Execute
        partitions, dirichlet_dist = create_lda_partitions(
            dataset=self.ds,
            num_partitions=num_partitions,
            concentration=concentration)
        x_lda = np.concatenate([item[0] for item in partitions])
        y_lda = np.concatenate([item[1] for item in partitions])

        # Assert
        np.testing.assert_array_equal(
            dirichlet_dist,
            1.0 / self.num_classes * np.ones(
                (num_partitions, self.num_classes), dtype=np.float32),
        )
        assert_identity(xy_0=self.ds, xy_1=(x_lda, y_lda))
コード例 #9
0
        'update_interval': 55,
        'ae_loss': 'mse',
        'cl_loss': 'kld'
    }

    outcomes = None

    # dataset, building the whole one and get the local
    if DATASET == 'blobs':
        n_features = 30
        X = datasets.make_blobs(n_samples=N_SAMPLES,
                                n_features=n_features,
                                random_state=SEED,
                                centers=N_CLUSTERS)
        if USE_LDA:
            Y, _ = create_lda_partitions(dataset=X, num_partitions=N_CLIENTS)
        else:
            Y = create_partitions(unpartitioned_dataset=X,
                                  iid_fraction=0.5,
                                  num_partitions=N_CLIENTS)
        x, y = Y[CLIENT_ID]  # .copy()
        # dimensions of the autoencoder dense layers
        dims = [x.shape[-1], int(4 * n_features), N_CLUSTERS]
        del X, Y
    elif DATASET == 'moons':
        x_tot, y_tot = data_util.build_dataset(N_CLIENTS, N_SAMPLES, R_NOISE,
                                               SEED)
        x_train, y_train = data_util.get_client_dataset(
            args.client_id, N_CLIENTS, x_tot, y_tot)
        # dimensions of the autoencoder dense layers
        dims = [x.shape[-1], 8, 8, 32, N_CLUSTERS]