def create_samplers(self, hps): if not dist.is_available(): self.train_sampler = BatchSampler(RandomSampler(self.train_dataset), batch_size=hps.bs, drop_last=True) self.test_sampler = BatchSampler(RandomSampler(self.test_dataset), batch_size=hps.bs, drop_last=True) else: self.train_sampler = DistributedSampler(self.train_dataset) self.test_sampler = DistributedSampler(self.test_dataset)
def get_train_val_loader(self): ''' Return an exception if not in training mode. :return mnist_cl_loader_train: mini-batch loader for training set :return mnist_cl_loader_val: full-batch loader for validation set (one batch with all validation set) ''' if self.train: train_indices, val_indices = train_test_split( list(range(self.targets.size(0))), test_size=self.perc_val, shuffle=True, stratify=self.targets.numpy()) train_sampler = BatchSampler(SubsetRandomSampler(train_indices), batch_size=self.batch_size, drop_last=True) val_sampler = BatchSampler(SubsetRandomSampler(val_indices), batch_size=len(val_indices), drop_last=False) mnist_cl_loader_train = DataLoader(self, batch_sampler=train_sampler) mnist_cl_loader_val = DataLoader(self, batch_sampler=val_sampler) return mnist_cl_loader_train, mnist_cl_loader_val else: raise Exception( "Cannot split train and validation when mode test is on. Split is allowed only in train mode." )
def test_dataset_loader_length(self, mnist_dataset): train_indices, valid_indices = mnist_dataset.get_train_and_validation_set_indices(train_valid_split_ratio=0.8, seed=2) train_loader = DatasetLoader(mnist_dataset, batch_sampler=BatchSampler(sampler=SubsetRandomSampler(train_indices), batch_size=50, drop_last=False)) valid_loader = DatasetLoader(mnist_dataset, batch_sampler=BatchSampler(sampler=SubsetRandomSampler(valid_indices), batch_size=50, drop_last=False)) assert (len(train_loader) == 56000) and (len(valid_loader) == 14000)
def create_dataloaders(dataset: Dataset, indexes: dict, batch_size): train_idx = indexes.get('train', None) val_idx = indexes.get('val', None) test_idx = indexes.get('test', None) dataloaders = {} if train_idx: train_set = Subset(dataset, train_idx) train_sampler = BatchSampler(SequentialSampler(train_set), batch_size=batch_size, drop_last=False) dataloaders['train'] = DataLoader(dataset, sampler=train_sampler, num_workers=10, pin_memory=True) if val_idx: val_set = Subset(dataset, val_idx) val_sampler = BatchSampler(SequentialSampler(val_set), batch_size=batch_size, drop_last=False) dataloaders['val'] = DataLoader(dataset, sampler=val_sampler, num_workers=10, pin_memory=True) if test_idx: test_set = Subset(dataset, test_idx) test_sampler = BatchSampler(SequentialSampler(test_set), batch_size=batch_size, drop_last=False) dataloaders['test'] = DataLoader(dataset, sampler=test_sampler, num_workers=10, pin_memory=True) return dataloaders
def data_loaders(batch_size, shuffle=True, ratios=[0.6, 0.2, 0.2]): X, y = get_mitbih() X_train, X_testvalid, y_train, y_testvalid = train_test_split(X, y, train_size=\ ratios[0], shuffle=True, stratify=y) X_valid, X_test, y_valid, y_test = train_test_split(X_testvalid, y_testvalid, train_size=ratios[1]/\ (ratios[1]+ratios[2])) ds_train = MITBIHDataset(X_train, y_train) ds_valid = MITBIHDataset(X_valid, y_valid) ds_test = MITBIHDataset(X_test, y_test) sampler_train = BatchSampler(RandomSampler(ds_train), batch_size=batch_size, drop_last=False) sampler_valid = BatchSampler(RandomSampler(ds_valid), batch_size=batch_size, drop_last=False) sampler_test = BatchSampler(RandomSampler(ds_test), batch_size=batch_size, drop_last=False) dl_train = DataLoader(ds_train, sampler=sampler_train, collate_fn=collate) dl_valid = DataLoader(ds_valid, sampler=sampler_valid, collate_fn=collate) dl_test = DataLoader(ds_test, batch_sampler=sampler_test, collate_fn=collate) return (dl_train, dl_valid, dl_test), ds_train
def run_on_subset(self, subset: Dataset) -> Tuple[DTLoss, DTLoss]: """train on subset, validate, and report.""" trn_sampler = BatchSampler(RandomSampler(subset), self.batch_size, False) val_sampler = BatchSampler(RandomSampler(self.val_set), self.batch_size, False) ### train self.model.train() trn_loss = DTLoss() for minibatch in trn_sampler: minibatch_loss = self._minibatch_loss(minibatch) self._optim.zero_grad() minibatch_loss.backward(self._loss_coefs) self._optim.step() trn_loss += minibatch_loss self.n_iters += len(minibatch) ### validate self.model.eval() val_loss = DTLoss() with torch.no_grad(): for minibatch in val_sampler: minibatch_loss = self._minibatch_loss(minibatch) val_loss += minibatch_loss return trn_loss, val_loss
def return_dataloader(self, dataset, batch_size, shuffle = False, num_workers=0): from torch.utils.data import BatchSampler, DataLoader, SequentialSampler, RandomSampler def collate(batch): return Batch.from_data_list(batch[0]) if shuffle: sampler = BatchSampler(RandomSampler(dataset),batch_size=batch_size,drop_last=False) else: sampler = BatchSampler(SequentialSampler(dataset),batch_size=batch_size,drop_last=False) return DataLoader(dataset = dataset, collate_fn = collate, num_workers = num_workers, pin_memory = True, sampler = sampler)
def return_dataloader(self, batch_size, shuffle = False): from torch.utils.data import BatchSampler, DataLoader, SequentialSampler, RandomSampler def collate(batch): return Batch.from_data_list(batch[0]) if shuffle: sampler = BatchSampler(RandomSampler(self.train()),batch_size=batch_size,drop_last=False) else: sampler = BatchSampler(SequentialSampler(self.test(extra_targets = '')),batch_size=batch_size,drop_last=False) return DataLoader(dataset = self, collate_fn = collate, sampler = sampler)
def __init__(self, model, optimizer, expert_buffer, algo_params, aux_tasks=AuxiliaryTask()): """ Basic behavioral cloning algorithm that minimizes the negative log likelihood """ self._optimizer = optimizer self.model = model self.expert_buffer = expert_buffer self.algo_params = algo_params self.step = 0 self.device = algo_params.get(c.DEVICE, torch.device(c.CPU)) self._opt_epochs = algo_params.get(c.OPT_EPOCHS, c.DEFAULT_BC_PARAMS[c.OPT_EPOCHS]) self._opt_batch_size = algo_params.get( c.OPT_BATCH_SIZE, c.DEFAULT_BC_PARAMS[c.OPT_BATCH_SIZE]) self._accum_num_grad = algo_params.get( c.ACCUM_NUM_GRAD, c.DEFAULT_BC_PARAMS[c.ACCUM_NUM_GRAD]) self._overfit_tolerance = algo_params.get( c.OVERFIT_TOLERANCE, c.DEFAULT_BC_PARAMS[c.OVERFIT_TOLERANCE]) self._aux_tasks = aux_tasks assert self._opt_batch_size % self._accum_num_grad == 0 self._num_samples_per_accum = self._opt_batch_size // self._accum_num_grad self._max_grad_norm = algo_params.get( c.MAX_GRAD_NORM, c.DEFAULT_BC_PARAMS[c.MAX_GRAD_NORM]) self.train_preprocessing = algo_params[c.TRAIN_PREPROCESSING] self._train_val_ratio = algo_params.get( c.VALIDATION_RATIO, c.DEFAULT_BC_PARAMS[c.VALIDATION_RATIO]) self.num_val = int(len(self.expert_buffer) * self._train_val_ratio) self.num_train = len(self.expert_buffer) - self.num_val idxes = np.random.permutation(np.arange(len(self.expert_buffer))) self._val_sampler = BatchSampler(sampler=SubsetRandomSampler( idxes[self.num_train:]), batch_size=self._opt_batch_size, drop_last=False) self._train_sampler = BatchSampler(sampler=SubsetRandomSampler( idxes[:self.num_train]), batch_size=self._opt_batch_size, drop_last=False) self.best_validation_loss = np.inf self._overfit_count = 0 self.overfitted = False self._curr_best_model = copy.deepcopy(self.model.state_dict())
def _init_train_loader(self, batch_size=0) -> DataLoader: """ 创建训练用的DataLoader :param batch_size: :return: """ random.shuffle(self.sample_set) sampler = SubsetRandomSampler(self.sample_set) if batch_size: sampler = BatchSampler(sampler, batch_size, drop_last=False) else: sampler = BatchSampler(sampler, len(sampler), drop_last=False) return DataLoader(self.train_set, batch_sampler=sampler, num_workers=2)
def gen_data(t, x, n_t, n_col, col_bs, init_bs, bc_bs, device='cpu'): """ Generates the dataloaders for the trainer. :param t: time domain limits ([t0, tf]). :param x: spatial coordinates. :param n_t: number of time domain points. :param n_col: number of collocation points. :param col_bs: collocations points batch size. :param init_bs: initial conditions batch size. :param bc_bs: boundary conditions batch size. :return: dataloaders: (collocation points, boundary conditions, initial conditions). """ idx_init = torch.arange(x.numel(), device=device).view(-1, 1) ds_init = BatchSampler(RandomSampler(range(x.numel()), replacement=False), batch_size=init_bs, drop_last=True) dl_init = DataLoader(PINNDataset(x.view(-1, 1), idx_init, device=device), sampler=ds_init, collate_fn=collate_pinn) t_bc = torch.linspace(t[0], t[-1], n_t, device=device).view(-1, 1) xlb = x[0] * torch.ones((n_t, 1), device=device, requires_grad=True) xub = x[-1] * torch.ones((n_t, 1), device=device, requires_grad=True) ds_bc = BatchSampler(RandomSampler(range(t_bc.numel()), replacement=False), batch_size=bc_bs, drop_last=True) dl_bc = DataLoader(PINNDataset(t_bc, xlb, xub, device=device, gradient=True), sampler=ds_bc, collate_fn=collate_pinn) t_col = torch.linspace(t[0], t[-1], n_col[0], device=device) x_col = torch.linspace(x[0], x[-1], n_col[1], device=device) t_col, x_col = torch.meshgrid([t_col, x_col]) t_col = t_col.flatten().view(-1, 1) x_col = x_col.flatten().view(-1, 1) ds_col = BatchSampler(RandomSampler(range(x_col.numel()), replacement=False), batch_size=col_bs, drop_last=True) dl_col = DataLoader(PINNDataset(t_col, x_col, device=device, gradient=True), sampler=ds_col, collate_fn=collate_pinn) return dl_col, dl_init, dl_bc
def prepare_dataloader( config, data_config, train_ds, valid_ds = None ): """Construct torch DataLoader(s) based on configuration specified when config.py and data_config.py was last ran.""" if data_config.include_valid_set: # DataLoader for latent vectors for validation: z_valid = gen_rand_latent_vars( num_samples = len( valid_ds ), length = config.len_latent, distribution = config.latent_distribution, device = 'cpu' ) if config.class_condition or config.use_auxiliary_classifier: z_labels = torch.randint( 0, config.num_classes, ( len( z_valid ), 1, ), dtype = torch.int64, device = 'cpu' ) if config.class_condition: z_valid = concat_rand_classes_to_z( z = z_valid, num_classes = config.num_classes, z_labels = z_labels, device = 'cpu' ) if config.class_condition or config.use_auxiliary_classifier: z_labels.squeeze_() z_valid_ds = TensorDataset( z_valid, z_labels ) else: z_valid_ds = TensorDataset( z_valid ) # DataLoader(s) for training data: dataloader = configure_dataloader_for_hardware( num_workers = config.num_workers, pin_memory = config.pin_memory ) valid_dl = None; z_valid_dl = None if config.model == 'ResNet GAN': train_dl = dataloader( dataset = train_ds, batch_size = config.batch_size, shuffle = True, drop_last = True ) if data_config.include_valid_set: valid_dl = dataloader( dataset = valid_ds, batch_size = config.batch_size, shuffle = False, drop_last = False ) z_valid_dl = dataloader( dataset = z_valid_ds, batch_size = config.batch_size, shuffle = False, drop_last = False ) elif config.model in ( 'ProGAN', 'StyleGAN', ): train_batch_sampler = BatchSampler( sampler = RandomSampler( data_source = train_ds ), batch_size = config.bs_dict[ config.init_res ], drop_last = True ) train_dl = dataloader( dataset = train_ds, batch_sampler = train_batch_sampler ) if data_config.include_valid_set: valid_batch_sampler = BatchSampler( sampler = SequentialSampler( data_source = valid_ds ), batch_size = config.bs_dict[ config.init_res ], drop_last = False ) valid_dl = dataloader( dataset = valid_ds, batch_sampler = valid_batch_sampler ) z_valid_batch_sampler = BatchSampler( sampler = SequentialSampler( data_source = z_valid_ds ), batch_size = config.bs_dict[ config.init_res ], drop_last = False ) z_valid_dl = dataloader( dataset = z_valid_ds, batch_sampler = z_valid_batch_sampler ) else: message = f'Model type {config.model} not supported, thus could not construct torch DataLoader.' raise ValueError( message ) return train_dl, valid_dl, z_valid_dl
def init_loaders(self): # maybe lazy load for predicting only runs for name in self.config.datasets: dataset_config = AttrDefault(lambda: None, self.config.datasets[name]) if self.config[ 'predict_only_mode'] and not dataset_config.predicting: continue # ds = self.run.get_command_function(dataset_config.dataset)() ds = self.dataset_manager.get_dataset(dataset_config) self.datasets[name] = ds shared_globals.logger.info("Initialized Dataset `" + name + "` with {} Samples ".format(len(ds))) if dataset_config.batch_config.get( "batch_sampler") == "stratified": shared_globals.logger.info( "Initializing StratifiedBatchSampler for " + name) batch_sampler = StratifiedBatchSampler( ds, dataset_config.batch_config.batch_size, self.config.epochs) elif dataset_config.batch_config.get( "batch_sampler") == "sequential": shared_globals.logger.info( "Initializing Sequential Sampler for " + name) sampler = SequentialSampler(ds) batch_sampler = BatchSampler( sampler, dataset_config.batch_config.batch_size, False) else: if dataset_config.testing or dataset_config.predicting: shared_globals.logger.info( "Initializing Sequential Sampler for " + name) sampler = SequentialSampler(ds) else: shared_globals.logger.info( "Initializing RandomSampler for " + name) sampler = RandomSampler(ds) batch_sampler = BatchSampler( sampler, dataset_config.batch_config.batch_size, True) loader = torch.utils.data.DataLoader( ds, # batch_size=batch_size, batch_sampler=batch_sampler, # shuffle=True, num_workers=dataset_config.num_of_workers, pin_memory=True, # drop_last=True, worker_init_fn=worker_init_fn, timeout=60) self.data_loaders[name] = loader
def generate_epoch_episodes_in_batches(self): ''' Generates batches of frames in randomized order by sampling frames from multiple episodes returns: 5-dimensional tensor (n, b, c, h, w) where n is number of batches, b is batch size, c is number of channels of frame and h and w are height and width of frame ''' # [[torch tensors (3, h, w)]] episodes = get_episodes(self.opt.game, self.opt.epoch_steps) total_steps = sum([len(e) for e in episodes]) print('Total Steps: {}'.format(total_steps)) # Episode sampler # Sample `num_samples` frames then batchify them with `self.batch_size` frames per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=total_steps), self.opt.batch_size, drop_last=True) all_batches = [] for indices in sampler: episodes_batch = [episodes[x] for x in indices] x_t, x_tprev, x_that, ts, thats = [], [], [], [], [] for episode in episodes_batch: # Get one sample from this episode t, t_hat = 0, 0 t, t_hat = np.random.randint(0, len(episode)), np.random.randint( 0, len(episode)) frame = episode[t] x_t.append(frame) x_batch = torch.stack(x_t).float() all_batches.append(x_batch) return torch.stack(all_batches)
def create_sampler(self, dataset: data.Dataset, batch_sampler: bool, profile: Profile, shared: Storage, logger: Logger): assert batch_sampler return BatchSampler(RandomSampler(dataset), batch_size=profile.batch_size, drop_last=profile.drop_last)
def __init__(self, base_dset, batch_size): super(CoherencyPairBatchWrapper, self).__init__() assert isinstance(base_dset, CoherencyPairDataSet) self.base = base_dset self.batch_size = batch_size self.sampler = list(BatchSampler(SequentialSampler(self.base), batch_size=self.batch_size, drop_last=False))
def get_problem(rank, world_size, batch_size, device, model_name: str): # Select the desired model on the fly logging.info(f"Using {model_name} for benchmarking") try: model = getattr(importlib.import_module("torchvision.models"), model_name)(pretrained=False).to(device) except AttributeError: model = getattr(importlib.import_module("timm.models"), model_name)(pretrained=False).to(device) # Data setup, duplicate the grey channels to get pseudo color def collate(inputs: List[Any]): return { "inputs": torch.stack([i[0] for i in inputs]).repeat(1, 3, 1, 1).to(device), "label": torch.tensor([i[1] for i in inputs]).to(device), } # Transforms transforms = [] if model_name.startswith("vit"): # ViT models are fixed size. Add a ad-hoc transform to resize the pictures accordingly pic_size = int(model_name.split("_")[-1]) transforms.append(Resize(pic_size)) transforms.append(ToTensor()) dataset = MNIST(transform=Compose(transforms), download=False, root=TEMPDIR) sampler: Sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank) batch_sampler = BatchSampler(sampler, batch_size, drop_last=True) dataloader = DataLoader(dataset=dataset, batch_sampler=batch_sampler, collate_fn=collate) loss_fn = nn.CrossEntropyLoss() return model, dataloader, loss_fn
def fit(self, train_x1, train_x2, train_x3, test_x1, test_x2, test_x3): data_size = train_x1.size(0) train_losses = [] for epoch in range(self.epoch_num): self.arch.train() batch_idxs = list( BatchSampler(RandomSampler(range(data_size)), batch_size=self.batch_size, drop_last=False)) for x in batch_idxs: self.optimizer.zero_grad() batch_x1 = train_x1[x, :] batch_x2 = train_x2[x, :] batch_x3 = train_x3[x, :] o1, o2, o3 = self.arch(batch_x1, batch_x2, batch_x3) loss = self.loss_function(o1, o2, o3) train_losses.append(loss.data) loss.backward() self.optimizer.step() # training the gcca model _, outputs = self._get_outputs(train_x1, train_x2, train_x3) GCCA_obj = GCCA(self.out_size, method="gcca") GCCA_obj.fit(outputs[0], outputs[1], outputs[2], params=None) #function from cca-zoo loss = self.test(test_x1, test_x2, test_x3) print("Fitted Model to Data")
def __init__(self, dataset: Any, max_repeats: int, batch_size: int = 1, shuffle: bool = False, use_imbalanced_sampler: bool = False, drop_last: bool = False, **kwargs: Any): """ Creates a new data loader. :param dataset: The dataset that should be loaded. :param batch_size: The number of samples per minibatch. :param shuffle: If true, the dataset will be shuffled randomly. :param drop_last: If true, drop incomplete minibatches at the end. :param kwargs: Additional arguments that will be passed through to the Dataloader constructor. """ sampler = RandomSampler(dataset) if shuffle else SequentialSampler( dataset) if use_imbalanced_sampler: sampler = ImbalancedSampler(dataset) self._actual_batch_sampler = BatchSampler(sampler, batch_size, drop_last) repeat_sampler = _RepeatSampler(self._actual_batch_sampler, batch_size=batch_size, max_repeats=max_repeats) super().__init__(dataset=dataset, batch_sampler=repeat_sampler, **kwargs) self.iterator = None
def predict(self, x1, x2, x3): #exception handling with torch.no_grad(): self.arch.eval() data_size = x1.size(0) #making mini batches batch_idxs = list( BatchSampler(SequentialSampler(range(data_size)), batch_size=self.batch_size, drop_last=False)) losses = [] outputs1 = [] outputs2 = [] outputs3 = [] for x in batch_idxs: batch_x1 = x1[x, :] batch_x2 = x2[x, :] batch_x3 = x3[x, :] #forward feeding to network o1, o2, o3 = self.arch(batch_x1, batch_x2, batch_x3) outputs1.append(o1) outputs2.append(o2) outputs3.append(o3) loss = self.loss_function(o1, o2, o3) losses.append(loss.item()) #new features outputs = [ torch.cat(outputs1, dim=0).numpy(), torch.cat(outputs2, dim=0).numpy(), torch.cat(outputs3, dim=0).numpy() ] return losses, outputs
def _get_outputs(self, x1, x2, x3): with torch.no_grad(): self.arch.eval() data_size = x1.size(0) batch_idxs = list( BatchSampler(SequentialSampler(range(data_size)), batch_size=self.batch_size, drop_last=False)) losses = [] outputs1 = [] outputs2 = [] outputs3 = [] for x in batch_idxs: batch_x1 = x1[x, :] batch_x2 = x2[x, :] batch_x3 = x3[x, :] o1, o2, o3 = self.arch(batch_x1, batch_x2, batch_x3) outputs1.append(o1) outputs2.append(o2) outputs3.append(o3) loss = self.loss_function(o1, o2, o3) losses.append(loss.item()) outputs = [ torch.cat(outputs1, dim=0).numpy(), torch.cat(outputs2, dim=0).numpy(), torch.cat(outputs3, dim=0).numpy() ] return losses, outputs
def validate(self, x, tfidf, y): ''' For validation while training ''' with torch.no_grad(): self.model.eval() data_size = x.shape[0] batch_idxs = list(BatchSampler(SequentialSampler( range(data_size)), batch_size=self.batch_size, drop_last=False)) losses = [] loss_hidden_list = [] loss_ae_list = [] for batch_idx in batch_idxs: batch_x1 = x[batch_idx, :].to(self.device) batch_tfidf = tfidf[batch_idx].to(self.device) batch_y = y[batch_idx].to(self.device) x_hidden, y_hidden, y_predicted = self.model( batch_x1, batch_tfidf, batch_y) loss_hidden, loss_ae = self.loss(x_hidden, y_hidden, y_predicted, batch_y) loss = loss_hidden+self.lamda*loss_ae losses.append(loss.item()) loss_hidden_list.append(loss_hidden.item()) loss_ae_list.append(loss_ae.item()) return np.mean(losses), np.mean(loss_hidden_list), np.mean(loss_ae_list)
def mean_importance(model, dataset, loss, batch_size, bar=False): ''' Calculate feature importance by measuring performance reduction when features are imputed with their mean value. Args: model: PyTorch model. Must be callable, likely inherits from nn.Module. dataset: PyTorch dataset, such as data.utils.TabularDataset. loss: string descriptor of loss function ('mse', 'cross entropy'). batch_size: number of examples to be processed at once. bar: whether to display progress bar. ''' # Setup. device = next(model.parameters()).device input_size = model.input_size loader = DataLoader(dataset, batch_sampler=BatchSampler(SequentialSampler(dataset), batch_size=batch_size, drop_last=False)) loss_fn = utils.get_loss_pytorch(loss, reduction='none') scores = [] # Verify model outputs are valid. utils.verify_pytorch_model(model, next(iter(loader))[0], loss) # Performance with all features. base_loss = validate_pytorch( model, loader, utils.get_loss_pytorch(loss, reduction='mean')).item() # For imputing with mean. imputation = utils.ReferenceImputation( torch.mean(torch.tensor(dataset.data), dim=0)) if bar: bar = tqdm(total=len(dataset) * input_size) with torch.no_grad(): for ind in range(input_size): # Setup. score = 0 N = 0 for x, y in loader: # Move to GPU. n = len(x) x = x.to(device=device) y = y.to(device=device) # Impute with mean and make predictions. y_hat = model(imputation.impute_ind(x, ind)) # Measure loss and compute average. loss = torch.mean(loss_fn(y_hat, y)) score = (score * N + loss * n) / (N + n) N += n if bar: bar.update(n) scores.append(score) return (torch.stack(scores) - base_loss).cpu().data.numpy()
def __init__(self, vf_dataset, batch_size=8, shuffle=False, drop_last=False, *args, **kwargs): self.vf_dataset = vf_dataset self.batch_size = batch_size self.shuffle = shuffle self.workers = vf_dataset.workers self.batch_samplers = {} for worker in self.workers: data_range = range(len(list(self.vf_dataset.datasets.values()))) if shuffle: sampler = RandomSampler(data_range) else: sampler = SequentialSampler(data_range) batch_sampler = BatchSampler(sampler, self.batch_size, drop_last) self.batch_samplers[worker] = batch_sampler single_loaders = [] for k in vfd.datasets.keys(): single_loaders.append( SinglePartitionDataLoader( vfd.datasets[k], batch_sampler=self.batch_samplers[k])) self.single_loaders = single_loaders
def generate_batch(self, episodes, mode): total_steps = len( episodes) # How many samples will be generated in total print("Total Steps: {}".format(total_steps)) # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch if mode == "test" or mode == "val": BS = len(episodes) else: BS = 16 sampler = BatchSampler( RandomSampler(range(len(episodes)), replacement=False), BS, drop_last=False, ) for indices in sampler: episodes_batch = [episodes[x] for x in indices] ts_number = torch.LongTensor(indices) i = 0 subjects = [] for episode in episodes_batch: # Get all samples from this episode subjects.append(episode) yield torch.stack(subjects).to(self.device), ts_number.to( self.device)
def _compute_per_element_grads(self, theta_init): self.model.load_state_dict(theta_init) batch_wise_indices = np.array( [list(BatchSampler(SequentialSampler(np.arange(self.N_trn)), self.batch_size, drop_last=False))][0]) cnt = 0 for batch_idx in batch_wise_indices: inputs = torch.cat( [self.trainset[x][0].view(-1, 1, self.trainset[x][0].shape[1], self.trainset[x][0].shape[2]) for x in batch_idx], dim=0).type(torch.float) targets = torch.tensor([self.trainset[x][1] for x in batch_idx]) inputs, targets = inputs.to(self.device), targets.to(self.device, non_blocking=True) if cnt == 0: with torch.no_grad(): data = F.softmax(self.model(inputs), dim=1) tmp_tensor = torch.zeros(len(inputs), self.num_classes).to(self.device) tmp_tensor.scatter_(1, targets.view(-1, 1), 1) outputs = tmp_tensor cnt = cnt + 1 else: cnt = cnt + 1 with torch.no_grad(): data = torch.cat((data, F.softmax(self.model(inputs), dim=1)), dim=0) tmp_tensor = torch.zeros(len(inputs), self.num_classes).to(self.device) tmp_tensor.scatter_(1, targets.view(-1, 1), 1) outputs = torch.cat((outputs, tmp_tensor), dim=0) grads_vec = data - outputs torch.cuda.empty_cache() print("Per Element Gradient Computation is Completed") self.grads_per_elem = grads_vec
def generate_batch(self, episodes): total_steps = sum([len(e) for e in episodes]) print('Total Steps: {}'.format(total_steps)) # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=total_steps), self.batch_size, drop_last=True) for indices in sampler: episodes_batch = [episodes[x] for x in indices] x_t, x_tprev, x_that, ts, thats = [], [], [], [], [] for episode in episodes_batch: # Get one sample from this episode t, t_hat = 0, 0 t, t_hat = np.random.randint(0, len(episode)), np.random.randint( 0, len(episode)) x_t.append(episode[t]) x_tprev.append(episode[t - 1]) ts.append([t]) yield torch.stack(x_t).float().to( self.device) / 255., torch.stack(x_tprev).float().to( self.device) / 255.
def __init__(self, data_source, batch_size, num_replicas=None, rank=None): if num_replicas is None: if not dist.is_available(): raise RuntimeError( "Requires distributed package to be available") num_replicas = dist.get_world_size() if rank is None: if not dist.is_available(): raise RuntimeError( "Requires distributed package to be available") rank = dist.get_rank() self.data_source = data_source self.batch_size = batch_size self.num_replicas = num_replicas assert len(self.data_source) % self.batch_size == 0 self.batch_sampler = list( BatchSampler(SequentialSampler(range(len(self.data_source))), batch_size=self.batch_size, drop_last=True)) self.num_samples = int( math.floor(len(self.batch_sampler) * 1.0 / self.num_replicas)) self.total_size = self.num_samples * self.num_replicas # print("sample log --------------", self.total_size, len(self.batch_sampler)) self.random_id_sampler = torch.randperm(self.total_size).tolist() self.rank = rank self.epoch = 0
def generate_batch(self, episodes): total_steps = sum([len(e) for e in episodes]) print('Total Steps: {}'.format(total_steps)) # Episode sampler # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch sampler = BatchSampler(RandomSampler(range(len(episodes)), replacement=True, num_samples=total_steps), self.batch_size, drop_last=True) for indices in sampler: episodes_batch = [episodes[x] for x in indices] x_t, x_tprev, x_that, ts, thats = [], [], [], [], [] for episode in episodes_batch: # Get one sample from this episode t, t_hat = 0, 0 t, t_hat = np.random.randint(0, len(episode)), np.random.randint( 0, len(episode)) x_t.append(episode[t]) # Apply the same transform to x_{t-1} and x_{t_hat} # https://github.com/pytorch/vision/issues/9#issuecomment-383110707 # Use numpy's random seed because Cutout uses np # seed = random.randint(0, 2 ** 32) # np.random.seed(seed) x_tprev.append(episode[t - 1]) # np.random.seed(seed) x_that.append(episode[t_hat]) ts.append([t]) thats.append([t_hat]) yield torch.stack(x_t).float().to(self.device) / 255., torch.stack(x_tprev).float().to(self.device) / 255., \ torch.stack(x_that).float().to(self.device) / 255., torch.Tensor(ts).to(self.device), \ torch.Tensor(thats).to(self.device)
def fit(self, w, xu, nb_iter=100, batch_size=None, lr=1e-3, method='adam', verbose=False, **kwargs): l2 = kwargs.get('l2', 0.) if method == 'adam': self.optim = Adam(self.parameters(), lr=lr, weight_decay=l2) else: momentum = kwargs.get('momentum', 0.) self.optim = SGD(self.parameters(), lr=lr, weight_decay=l2, momentum=momentum) set_size = xu.shape[0] batch_size = set_size if batch_size is None else batch_size batches = list(BatchSampler(SubsetRandomSampler(range(set_size)), batch_size, True)) for n in range(nb_iter): for batch in batches: self.optim.zero_grad() loss = - self.elbo(w[batch], xu[batch], batch_size, set_size) loss.backward() self.optim.step() if verbose: if n % 100 == 0: print('Epoch: {}/{}.............'.format(n, nb_iter), end=' ') print("Loss: {:.4f}".format(loss))