Esempio n. 1
0
def fraction_of_datasets(datasets, fraction, attackers_idx=[]):
    """Extract a fraction of data from each dataset and return 
    the aggregated data as a FLCustomDataset.

    Args:
        datasets (dict[FLCustomDataset]): 
        fraction (float): Fraction between 0.0 and 1.0

    Returns:
        [FLCustomDataset]:
    """    
    logging.info("Extracting {}% of users data (total: {}) to be sent to the server...".format(
        fraction * 100.0, int(fraction * len(datasets) * len(list(datasets.values())[0].targets))))
    images, labels = [], []
    for ww_id, dataset in datasets.items():
        idx = torch.randperm(len(dataset.targets))[:int(fraction * len(dataset.targets))]
        if ww_id in attackers_idx:
            images.append(
                (dataset.data[idx.tolist()] +
                np.random.randint(0, 1024, (len(idx),28,28))).byte()
            )
        else:
            images.append(dataset.data[idx.tolist()])
        labels.append(dataset.targets[idx.tolist()])
    aggregate_dataset = FLCustomDataset(
        torch.cat(images), torch.cat(labels),
        transform=transforms.Compose([
            transforms.ToTensor()])
    )
    logging.info("Extracted... Ok, The size of the extracted data: {}".format(
        aggregate_dataset.data.shape))
    return aggregate_dataset
Esempio n. 2
0
def get_server_mnist_dataset(dataset, workers_num, percentage):
    """ 
    Args:
        dataset (FLCustomDataset): 
        workers_num (int): Total number of workers
        percentage (float): Out of 100
    Returns:
        (FLCustomDataset)
    """  
    logging.info("Creating server MNIST data loader.")
    # Create a temporary DataLoader with adjusted batch_size according to the number of workers.
    # Each batch is supposed to be assigned to a worker. 
    # We just take out a percentage of each batch and save it for the server

    batch_size = int(len(dataset) / workers_num)
    tmp_dataloader = get_dataloader(dataset, batch_size, shuffle=False, drop_last=True)
    
    server_dataset = dict()
    server_dataset['x'] = tensor([], dtype=float32).reshape(0, 1, 28, 28)
    server_dataset['y'] = tensor([], dtype=int64)

    for batch_idx, (data, target) in enumerate(tmp_dataloader):
        # if batch_idx % 100 == 0:
        #     logging.info('{:.2f}% Loaded...'.format(round((batch_idx * 100) / len(dataloader), 2)))
        server_dataset['x'] = torch.cat((server_dataset['x'], data[:floor(len(data) * (percentage / 100.0))]))
        server_dataset['y'] = torch.cat((server_dataset['y'], target[:floor(len(target) * (percentage / 100.0))]))
        logging.debug("Taking {} out of {} from worker {}, Total: [{}]".format(
            floor(len(data) * (percentage / 100.0)), 
            len(data), 
            batch_idx,
            server_dataset['y'].shape))
    
    return FLCustomDataset(server_dataset['x'], server_dataset['y'])
Esempio n. 3
0
def split_randomly_dataset(dataset, shards_num):
    """Split whole dataset into `shards_num` categories and 
    randomly return rearrange their position.

    Args:
        dataset ([torch.dataset]): 
        shards_num (int): Number of shards

    Yields:
        [Iterable[FLCustomDataset]]: Returning a generator of FLCustomDDataset
    """    
    samples_per_shards_num = int(len(dataset) / shards_num)
    logging.info(
        "Splitting the dataset into {} groups, each with {} samples...".format(shards_num, samples_per_shards_num))
    idx = [ii for ii in range(shards_num)]
    random.shuffle(idx)
    splitted_data = torch.split(dataset.data, samples_per_shards_num)
    splitted_targets = torch.split(dataset.targets, samples_per_shards_num)
    for ii in range(len(idx)):
        yield FLCustomDataset(
                splitted_data[idx[ii]],
                splitted_targets[idx[ii]],
                transform=transforms.Compose([
                    transforms.ToTensor()])
            )
Esempio n. 4
0
def merge_and_shuffle_dataset(datasets):
    images, labels = [], []
    for dataset in datasets:
        images.append(dataset.data)
        labels.append(dataset.targets)
    images, labels = torch.cat(images), torch.cat(labels)
    return shuffle_dataset(FLCustomDataset(
        images, labels, transform=transforms.Compose(
            [transforms.ToTensor()])))
Esempio n. 5
0
def sort_mnist_dataset(dataset):
    """ 
    Args:
        dataset (torch.dataset):

    Returns:
        
    """ 
    logging.info("Sorting the MNIST dataset based on labels...")
    sorted_index = sorted(range(len(dataset.targets)), key=lambda k: dataset.targets[k])
    return FLCustomDataset(
        dataset.data[sorted_index],
        dataset.targets[sorted_index],
        transform=transforms.Compose([
            transforms.ToTensor()])
        )
Esempio n. 6
0
def map_shards_to_worker(splitted_datasets, workers_idx, num_shards_per_worker):
    federated_datasets = defaultdict(lambda: [])
    for ii, ww_id in enumerate(workers_idx):
        images, labels = [], []
        # Two shard should be given to each worker
        for shard_idx in range(num_shards_per_worker):
            ds = next(splitted_datasets)
            images.append(ds.data)
            labels.append(ds.targets)
        if num_shards_per_worker > 1:
            for ii in range(num_shards_per_worker-1):
                images = torch.cat((images[ii], images[ii+1]))
                labels = torch.cat((labels[ii], labels[ii+1]))
        else:
            images = images[0]
            labels = labels[0]
        yield {ww_id: FLCustomDataset(
            images,labels, 
            transform=transforms.Compose([
                transforms.ToTensor()]))}
Esempio n. 7
0
    def create_femnist_dataset(self,
                               raw_data,
                               workers_idx,
                               shuffle=True,
                               drop_last=True):
        """ 

        Args:
            raw_data (dict of str): dict contains processed train and test data categorized based on user id
                # raw_data['f0_12345']['x'], raw_data['f0_12345']['y'] 
        Returns:
            Dataloader for the server
        """
        logging.info("Creating 1 test dataset from {} workers".format(
            len(workers_idx)))
        # raw_data = utils.extract_data(raw_data, workers_idx)
        server_images = np.array([], dtype=np.float32).reshape(-1, 28, 28)
        server_labels = np.array([], dtype=np.int64)

        for worker_id in workers_idx:
            images = np.array(raw_data[worker_id]['x'],
                              dtype=np.float32).reshape(-1, 28, 28)
            labels = np.array(raw_data[worker_id]['x'], dtype=np.int64).ravel()
            server_images = np.concatenate((server_images, images))
            server_labels = np.concatenate((server_labels, labels))

        test_dataset = FLCustomDataset(server_images,
                                       server_labels,
                                       transform=transforms.Compose([
                                           transforms.ToTensor(),
                                           transforms.Normalize(
                                               (server_images.mean(), ),
                                               (server_images.std(), ))
                                       ]))

        return test_dataset