def fraction_of_datasets(datasets, fraction, attackers_idx=[]): """Extract a fraction of data from each dataset and return the aggregated data as a FLCustomDataset. Args: datasets (dict[FLCustomDataset]): fraction (float): Fraction between 0.0 and 1.0 Returns: [FLCustomDataset]: """ logging.info("Extracting {}% of users data (total: {}) to be sent to the server...".format( fraction * 100.0, int(fraction * len(datasets) * len(list(datasets.values())[0].targets)))) images, labels = [], [] for ww_id, dataset in datasets.items(): idx = torch.randperm(len(dataset.targets))[:int(fraction * len(dataset.targets))] if ww_id in attackers_idx: images.append( (dataset.data[idx.tolist()] + np.random.randint(0, 1024, (len(idx),28,28))).byte() ) else: images.append(dataset.data[idx.tolist()]) labels.append(dataset.targets[idx.tolist()]) aggregate_dataset = FLCustomDataset( torch.cat(images), torch.cat(labels), transform=transforms.Compose([ transforms.ToTensor()]) ) logging.info("Extracted... Ok, The size of the extracted data: {}".format( aggregate_dataset.data.shape)) return aggregate_dataset
def create_label_mapping(datasets): labels = set() for _, ds in datasets.items(): labels.update(ds["func"].unique()) mapping = {} indx = 1 for x in labels: mapping[x] = indx indx += 1 return mapping
def _create_split_schemes(self, datasets): output = {} for transform_type, v1 in datasets.items(): output[transform_type] = {} for split_name, v2 in v1.items(): indices = v2.get_split_indices(split_name) if indices is not None: output[transform_type][ split_name] = torch.utils.data.Subset(v2, indices) else: output[transform_type][split_name] = v2 return {self.get_split_scheme_name(0): output}
def get_dataloaders(datasets, batch_size=None, shuffle=None): batch_size, shuffle = _parse_batch_size_shuffle(batch_size, shuffle) assert isinstance( datasets, dict), f"Expected datasets to be a dict but found {type(datasets)}" loaders = { phase: DataLoader(dataset, batch_size=batch_size[phase], shuffle=shuffle[phase]) for phase, dataset in datasets.items() } return loaders
#%% device = torch.device("cuda" if torch.cuda.is_available() else "cpu") import models # %% dataset = models.GardenData(dat, device="cuda") train_len = int(len(dataset) * 0.9) d = torch.utils.data.random_split( dataset, [train_len, len(dataset) - train_len]) datasets = {'train': d[0], 'valid': d[1]} dataloaders = { phase: torch.utils.data.DataLoader(dataset=ds, batch_size=32, num_workers=0, shuffle=True) for phase, ds in datasets.items() } dataset_sizes = {name: len(dl.dataset) for name, dl in dataloaders.items()} logging.info("Dataset sizes:") logging.info(dataset_sizes) #depth = dataset.__getitem__(2)['depth_frame'] #%% from torch import nn model = models.ConvNet() model = model.to(device) criterion = nn.CrossEntropyLoss() # Observe that all parameters are being optimized
def count_labels(datasets): labels = set() for _, ds in datasets.items(): labels.update(ds["func"].unique()) return len(labels)
def siam_vsone_train(): r""" CommandLine: python -m clab.live.siam_train siam_vsone_train --dry python -m clab.live.siam_train siam_vsone_train python -m clab.live.siam_train siam_vsone_train --db PZ_Master1 python -m clab.live.siam_train siam_vsone_train --db PZ_MTEST --dry python -m clab.live.siam_train siam_vsone_train --db PZ_MTEST python -m clab.live.siam_train siam_vsone_train --db RotanTurtles python -m clab.live.siam_train siam_vsone_train --db humpbacks_fb Example: >>> # DISABLE_DOCTEST >>> from clab.live.siam_train import * # NOQA >>> siam_vsone_train() """ dbname = ub.argval('--db', default='PZ_MTEST') # train_dataset, vali_dataset, test_dataset = ibeis_datasets('GZ_Master') dim = 512 datasets = randomized_ibeis_dset(dbname, dim=dim) workdir = ub.ensuredir( os.path.expanduser('~/data/work/siam-ibeis2/' + dbname)) print('workdir = {!r}'.format(workdir)) # train_dataset, vali_dataset, test_dataset = att_faces_datasets() # workdir = os.path.expanduser('~/data/work/siam-att') for k, v in datasets.items(): print('* len({}) = {}'.format(k, len(v))) batch_size = 6 print('batch_size = {!r}'.format(batch_size)) # class_weights = train_dataset.class_weights() # print('class_weights = {!r}'.format(class_weights)) pretrained = 'resnet50' # pretrained = 'resnet50' branch = getattr(torchvision.models, pretrained)(pretrained=True) model = models.SiameseLP(p=2, branch=branch, input_shape=(1, 3, dim, dim)) print(model) hyper = hyperparams.HyperParams(criterion=(criterions.ContrastiveLoss, { 'margin': 4, 'weight': None, }), optimizer=(torch.optim.SGD, { 'weight_decay': .0005, 'momentum': 0.9, 'nesterov': True, }), scheduler=('Exponential', { 'gamma': 0.99, 'base_lr': 0.001, 'stepsize': 2, }), other={ 'n_classes': 2, 'augment': datasets['train'].augment, }) def custom_metrics(harn, output, label): from .torch import metrics metrics_dict = metrics._siamese_metrics(output, label, margin=harn.criterion.margin) return metrics_dict dry = ub.argflag('--dry') from .torch.sseg_train import directory_structure train_dpath, test_dpath = directory_structure( workdir, model.__class__.__name__, datasets, pretrained=pretrained, train_hyper_id=hyper.hyper_id(), suffix='_' + hyper.other_id()) xpu = xpu_device.XPU.from_argv() harn = fit_harness.FitHarness( model=model, hyper=hyper, datasets=datasets, xpu=xpu, batch_size=batch_size, train_dpath=train_dpath, dry=dry, ) harn.add_metric_hook(custom_metrics) harn.run()