Beispiel #1
0
 def create_samplers(self, hps):
     if not dist.is_available():
         self.train_sampler = BatchSampler(RandomSampler(self.train_dataset), batch_size=hps.bs, drop_last=True)
         self.test_sampler = BatchSampler(RandomSampler(self.test_dataset), batch_size=hps.bs, drop_last=True)
     else:
         self.train_sampler = DistributedSampler(self.train_dataset)
         self.test_sampler = DistributedSampler(self.test_dataset)
Beispiel #2
0
    def get_train_val_loader(self):
        '''
        Return an exception if not in training mode.

        :return mnist_cl_loader_train: mini-batch loader for training set
        :return mnist_cl_loader_val: full-batch loader for validation set (one batch with all validation set)
        '''

        if self.train:
            train_indices, val_indices = train_test_split(
                list(range(self.targets.size(0))),
                test_size=self.perc_val,
                shuffle=True,
                stratify=self.targets.numpy())
            train_sampler = BatchSampler(SubsetRandomSampler(train_indices),
                                         batch_size=self.batch_size,
                                         drop_last=True)
            val_sampler = BatchSampler(SubsetRandomSampler(val_indices),
                                       batch_size=len(val_indices),
                                       drop_last=False)

            mnist_cl_loader_train = DataLoader(self,
                                               batch_sampler=train_sampler)
            mnist_cl_loader_val = DataLoader(self, batch_sampler=val_sampler)

            return mnist_cl_loader_train, mnist_cl_loader_val
        else:
            raise Exception(
                "Cannot split train and validation when mode test is on. Split is allowed only in train mode."
            )
 def test_dataset_loader_length(self, mnist_dataset):
     train_indices, valid_indices = mnist_dataset.get_train_and_validation_set_indices(train_valid_split_ratio=0.8, seed=2)
     train_loader = DatasetLoader(mnist_dataset, batch_sampler=BatchSampler(sampler=SubsetRandomSampler(train_indices),
                                                                      batch_size=50, drop_last=False))
     valid_loader = DatasetLoader(mnist_dataset, batch_sampler=BatchSampler(sampler=SubsetRandomSampler(valid_indices),
                                                                      batch_size=50, drop_last=False))
     assert (len(train_loader) == 56000) and (len(valid_loader) == 14000)
Beispiel #4
0
def create_dataloaders(dataset: Dataset, indexes: dict, batch_size):
    train_idx = indexes.get('train', None)
    val_idx = indexes.get('val', None)
    test_idx = indexes.get('test', None)
    dataloaders = {}
    if train_idx:
        train_set = Subset(dataset, train_idx)
        train_sampler = BatchSampler(SequentialSampler(train_set),
                                     batch_size=batch_size,
                                     drop_last=False)
        dataloaders['train'] = DataLoader(dataset,
                                          sampler=train_sampler,
                                          num_workers=10,
                                          pin_memory=True)
    if val_idx:
        val_set = Subset(dataset, val_idx)
        val_sampler = BatchSampler(SequentialSampler(val_set),
                                   batch_size=batch_size,
                                   drop_last=False)
        dataloaders['val'] = DataLoader(dataset,
                                        sampler=val_sampler,
                                        num_workers=10,
                                        pin_memory=True)
    if test_idx:
        test_set = Subset(dataset, test_idx)
        test_sampler = BatchSampler(SequentialSampler(test_set),
                                    batch_size=batch_size,
                                    drop_last=False)
        dataloaders['test'] = DataLoader(dataset,
                                         sampler=test_sampler,
                                         num_workers=10,
                                         pin_memory=True)
    return dataloaders
Beispiel #5
0
def data_loaders(batch_size, shuffle=True, ratios=[0.6, 0.2, 0.2]):
    X, y = get_mitbih()
    X_train, X_testvalid, y_train, y_testvalid = train_test_split(X, y,
                                                                  train_size=\
                                                                  ratios[0],
                                                                  shuffle=True,
                                                                  stratify=y)
    X_valid, X_test, y_valid, y_test = train_test_split(X_testvalid,
                                                        y_testvalid,
                                                        train_size=ratios[1]/\
                                                        (ratios[1]+ratios[2]))
    ds_train = MITBIHDataset(X_train, y_train)
    ds_valid = MITBIHDataset(X_valid, y_valid)
    ds_test = MITBIHDataset(X_test, y_test)

    sampler_train = BatchSampler(RandomSampler(ds_train),
                                 batch_size=batch_size,
                                 drop_last=False)
    sampler_valid = BatchSampler(RandomSampler(ds_valid),
                                 batch_size=batch_size,
                                 drop_last=False)
    sampler_test = BatchSampler(RandomSampler(ds_test),
                                batch_size=batch_size,
                                drop_last=False)

    dl_train = DataLoader(ds_train, sampler=sampler_train, collate_fn=collate)
    dl_valid = DataLoader(ds_valid, sampler=sampler_valid, collate_fn=collate)
    dl_test = DataLoader(ds_test,
                         batch_sampler=sampler_test,
                         collate_fn=collate)

    return (dl_train, dl_valid, dl_test), ds_train
Beispiel #6
0
    def run_on_subset(self, subset: Dataset) -> Tuple[DTLoss, DTLoss]:
        """train on subset, validate, and report."""
        trn_sampler = BatchSampler(RandomSampler(subset), self.batch_size,
                                   False)
        val_sampler = BatchSampler(RandomSampler(self.val_set),
                                   self.batch_size, False)

        ### train
        self.model.train()
        trn_loss = DTLoss()
        for minibatch in trn_sampler:
            minibatch_loss = self._minibatch_loss(minibatch)

            self._optim.zero_grad()
            minibatch_loss.backward(self._loss_coefs)
            self._optim.step()

            trn_loss += minibatch_loss
            self.n_iters += len(minibatch)

        ### validate
        self.model.eval()
        val_loss = DTLoss()
        with torch.no_grad():
            for minibatch in val_sampler:
                minibatch_loss = self._minibatch_loss(minibatch)

                val_loss += minibatch_loss

        return trn_loss, val_loss
Beispiel #7
0
 def return_dataloader(self, dataset, batch_size, shuffle = False, num_workers=0):
     from torch.utils.data import BatchSampler, DataLoader, SequentialSampler, RandomSampler
     
     def collate(batch):
         return Batch.from_data_list(batch[0])
     
     if shuffle:
         sampler = BatchSampler(RandomSampler(dataset),batch_size=batch_size,drop_last=False)
     else:
         sampler = BatchSampler(SequentialSampler(dataset),batch_size=batch_size,drop_last=False)
     
     return DataLoader(dataset = dataset, collate_fn = collate, num_workers = num_workers, pin_memory = True, sampler = sampler)
 def return_dataloader(self, batch_size, shuffle = False):
     from torch.utils.data import BatchSampler, DataLoader, SequentialSampler, RandomSampler
     
     def collate(batch):
         return Batch.from_data_list(batch[0])
     
     if shuffle:
         sampler = BatchSampler(RandomSampler(self.train()),batch_size=batch_size,drop_last=False)
     else:
         sampler = BatchSampler(SequentialSampler(self.test(extra_targets = '')),batch_size=batch_size,drop_last=False)
     
     return DataLoader(dataset = self, collate_fn = collate, sampler = sampler)
Beispiel #9
0
    def __init__(self,
                 model,
                 optimizer,
                 expert_buffer,
                 algo_params,
                 aux_tasks=AuxiliaryTask()):
        """ Basic behavioral cloning algorithm that minimizes the negative log likelihood
        """

        self._optimizer = optimizer
        self.model = model
        self.expert_buffer = expert_buffer
        self.algo_params = algo_params
        self.step = 0

        self.device = algo_params.get(c.DEVICE, torch.device(c.CPU))
        self._opt_epochs = algo_params.get(c.OPT_EPOCHS,
                                           c.DEFAULT_BC_PARAMS[c.OPT_EPOCHS])
        self._opt_batch_size = algo_params.get(
            c.OPT_BATCH_SIZE, c.DEFAULT_BC_PARAMS[c.OPT_BATCH_SIZE])
        self._accum_num_grad = algo_params.get(
            c.ACCUM_NUM_GRAD, c.DEFAULT_BC_PARAMS[c.ACCUM_NUM_GRAD])
        self._overfit_tolerance = algo_params.get(
            c.OVERFIT_TOLERANCE, c.DEFAULT_BC_PARAMS[c.OVERFIT_TOLERANCE])
        self._aux_tasks = aux_tasks

        assert self._opt_batch_size % self._accum_num_grad == 0
        self._num_samples_per_accum = self._opt_batch_size // self._accum_num_grad

        self._max_grad_norm = algo_params.get(
            c.MAX_GRAD_NORM, c.DEFAULT_BC_PARAMS[c.MAX_GRAD_NORM])

        self.train_preprocessing = algo_params[c.TRAIN_PREPROCESSING]

        self._train_val_ratio = algo_params.get(
            c.VALIDATION_RATIO, c.DEFAULT_BC_PARAMS[c.VALIDATION_RATIO])
        self.num_val = int(len(self.expert_buffer) * self._train_val_ratio)
        self.num_train = len(self.expert_buffer) - self.num_val
        idxes = np.random.permutation(np.arange(len(self.expert_buffer)))
        self._val_sampler = BatchSampler(sampler=SubsetRandomSampler(
            idxes[self.num_train:]),
                                         batch_size=self._opt_batch_size,
                                         drop_last=False)
        self._train_sampler = BatchSampler(sampler=SubsetRandomSampler(
            idxes[:self.num_train]),
                                           batch_size=self._opt_batch_size,
                                           drop_last=False)

        self.best_validation_loss = np.inf
        self._overfit_count = 0
        self.overfitted = False
        self._curr_best_model = copy.deepcopy(self.model.state_dict())
Beispiel #10
0
 def _init_train_loader(self, batch_size=0) -> DataLoader:
     """
     创建训练用的DataLoader
     :param batch_size:
     :return:
     """
     random.shuffle(self.sample_set)
     sampler = SubsetRandomSampler(self.sample_set)
     if batch_size:
         sampler = BatchSampler(sampler, batch_size, drop_last=False)
     else:
         sampler = BatchSampler(sampler, len(sampler), drop_last=False)
     return DataLoader(self.train_set, batch_sampler=sampler, num_workers=2)
Beispiel #11
0
def gen_data(t, x, n_t, n_col, col_bs, init_bs, bc_bs, device='cpu'):
    """
    Generates the dataloaders for the trainer.

    :param t: time domain limits ([t0, tf]).
    :param x: spatial coordinates.
    :param n_t: number of time domain points.
    :param n_col: number of collocation points.
    :param col_bs: collocations points batch size.
    :param init_bs: initial conditions batch size.
    :param bc_bs: boundary conditions batch size.
    :return: dataloaders: (collocation points, boundary conditions, initial conditions).
    """
    idx_init = torch.arange(x.numel(), device=device).view(-1, 1)
    ds_init = BatchSampler(RandomSampler(range(x.numel()), replacement=False),
                           batch_size=init_bs,
                           drop_last=True)
    dl_init = DataLoader(PINNDataset(x.view(-1, 1), idx_init, device=device),
                         sampler=ds_init,
                         collate_fn=collate_pinn)

    t_bc = torch.linspace(t[0], t[-1], n_t, device=device).view(-1, 1)
    xlb = x[0] * torch.ones((n_t, 1), device=device, requires_grad=True)
    xub = x[-1] * torch.ones((n_t, 1), device=device, requires_grad=True)
    ds_bc = BatchSampler(RandomSampler(range(t_bc.numel()), replacement=False),
                         batch_size=bc_bs,
                         drop_last=True)
    dl_bc = DataLoader(PINNDataset(t_bc,
                                   xlb,
                                   xub,
                                   device=device,
                                   gradient=True),
                       sampler=ds_bc,
                       collate_fn=collate_pinn)

    t_col = torch.linspace(t[0], t[-1], n_col[0], device=device)
    x_col = torch.linspace(x[0], x[-1], n_col[1], device=device)
    t_col, x_col = torch.meshgrid([t_col, x_col])
    t_col = t_col.flatten().view(-1, 1)
    x_col = x_col.flatten().view(-1, 1)
    ds_col = BatchSampler(RandomSampler(range(x_col.numel()),
                                        replacement=False),
                          batch_size=col_bs,
                          drop_last=True)
    dl_col = DataLoader(PINNDataset(t_col, x_col, device=device,
                                    gradient=True),
                        sampler=ds_col,
                        collate_fn=collate_pinn)

    return dl_col, dl_init, dl_bc
Beispiel #12
0
def prepare_dataloader( config, data_config, train_ds, valid_ds = None ):
  """Construct torch DataLoader(s) based on configuration specified when config.py and data_config.py was last ran."""
  if data_config.include_valid_set:
    # DataLoader for latent vectors for validation:
    z_valid = gen_rand_latent_vars(
                num_samples = len( valid_ds ),
                length = config.len_latent,
                distribution = config.latent_distribution,
                device = 'cpu'
              )
    if config.class_condition or config.use_auxiliary_classifier:
        z_labels = torch.randint( 0, config.num_classes, ( len( z_valid ), 1, ),
                                  dtype = torch.int64, device = 'cpu' )
    if config.class_condition:
        z_valid = concat_rand_classes_to_z( z = z_valid, num_classes = config.num_classes,
                                            z_labels = z_labels, device = 'cpu' )
    if config.class_condition or config.use_auxiliary_classifier:
        z_labels.squeeze_()
        z_valid_ds = TensorDataset( z_valid, z_labels )
    else:
        z_valid_ds = TensorDataset( z_valid )

  # DataLoader(s) for training data:
  dataloader = configure_dataloader_for_hardware( num_workers = config.num_workers, pin_memory = config.pin_memory )

  valid_dl = None; z_valid_dl = None
  if config.model == 'ResNet GAN':
    train_dl = dataloader( dataset = train_ds, batch_size = config.batch_size,
                           shuffle = True, drop_last = True )
    if data_config.include_valid_set:
      valid_dl = dataloader( dataset = valid_ds, batch_size = config.batch_size,
                             shuffle = False, drop_last = False )
      z_valid_dl = dataloader( dataset = z_valid_ds, batch_size = config.batch_size,
                               shuffle = False, drop_last = False )
  elif config.model in ( 'ProGAN', 'StyleGAN', ):
    train_batch_sampler = BatchSampler( sampler = RandomSampler( data_source = train_ds ),
                                        batch_size = config.bs_dict[ config.init_res ], drop_last = True )
    train_dl = dataloader( dataset = train_ds, batch_sampler = train_batch_sampler )
    if data_config.include_valid_set:
      valid_batch_sampler = BatchSampler( sampler = SequentialSampler( data_source = valid_ds ),
                                          batch_size = config.bs_dict[ config.init_res ], drop_last = False )
      valid_dl = dataloader( dataset = valid_ds, batch_sampler = valid_batch_sampler )
      z_valid_batch_sampler = BatchSampler( sampler = SequentialSampler( data_source = z_valid_ds ),
                                            batch_size = config.bs_dict[ config.init_res ], drop_last = False )
      z_valid_dl = dataloader( dataset = z_valid_ds, batch_sampler = z_valid_batch_sampler )
  else:
    message = f'Model type {config.model} not supported, thus could not construct torch DataLoader.'
    raise ValueError( message )

  return train_dl, valid_dl, z_valid_dl
Beispiel #13
0
    def init_loaders(self):
        # maybe lazy load for predicting only runs
        for name in self.config.datasets:
            dataset_config = AttrDefault(lambda: None,
                                         self.config.datasets[name])
            if self.config[
                    'predict_only_mode'] and not dataset_config.predicting:
                continue
            # ds = self.run.get_command_function(dataset_config.dataset)()
            ds = self.dataset_manager.get_dataset(dataset_config)

            self.datasets[name] = ds
            shared_globals.logger.info("Initialized Dataset  `" + name +
                                       "` with {} Samples ".format(len(ds)))
            if dataset_config.batch_config.get(
                    "batch_sampler") == "stratified":
                shared_globals.logger.info(
                    "Initializing  StratifiedBatchSampler for " + name)
                batch_sampler = StratifiedBatchSampler(
                    ds, dataset_config.batch_config.batch_size,
                    self.config.epochs)
            elif dataset_config.batch_config.get(
                    "batch_sampler") == "sequential":
                shared_globals.logger.info(
                    "Initializing Sequential Sampler for " + name)
                sampler = SequentialSampler(ds)
                batch_sampler = BatchSampler(
                    sampler, dataset_config.batch_config.batch_size, False)
            else:
                if dataset_config.testing or dataset_config.predicting:
                    shared_globals.logger.info(
                        "Initializing Sequential Sampler for " + name)
                    sampler = SequentialSampler(ds)
                else:
                    shared_globals.logger.info(
                        "Initializing RandomSampler for " + name)
                    sampler = RandomSampler(ds)
                batch_sampler = BatchSampler(
                    sampler, dataset_config.batch_config.batch_size, True)
            loader = torch.utils.data.DataLoader(
                ds,
                # batch_size=batch_size,
                batch_sampler=batch_sampler,
                # shuffle=True,
                num_workers=dataset_config.num_of_workers,
                pin_memory=True,
                # drop_last=True,
                worker_init_fn=worker_init_fn,
                timeout=60)
            self.data_loaders[name] = loader
 def generate_epoch_episodes_in_batches(self):
     '''
     Generates batches of frames in randomized order by sampling frames from multiple episodes
     returns: 5-dimensional tensor (n, b, c, h, w)  where n is number of batches, b is batch size, 
     c is number of channels of frame and h and w are height and width of frame
     '''
     # [[torch tensors (3, h, w)]]
     episodes = get_episodes(self.opt.game, self.opt.epoch_steps)
     total_steps = sum([len(e) for e in episodes])
     print('Total Steps: {}'.format(total_steps))
     # Episode sampler
     # Sample `num_samples` frames then batchify them with `self.batch_size` frames per batch
     sampler = BatchSampler(RandomSampler(range(len(episodes)),
                                          replacement=True,
                                          num_samples=total_steps),
                            self.opt.batch_size,
                            drop_last=True)
     all_batches = []
     for indices in sampler:
         episodes_batch = [episodes[x] for x in indices]
         x_t, x_tprev, x_that, ts, thats = [], [], [], [], []
         for episode in episodes_batch:
             # Get one sample from this episode
             t, t_hat = 0, 0
             t, t_hat = np.random.randint(0,
                                          len(episode)), np.random.randint(
                                              0, len(episode))
             frame = episode[t]
             x_t.append(frame)
         x_batch = torch.stack(x_t).float()
         all_batches.append(x_batch)
     return torch.stack(all_batches)
Beispiel #15
0
    def create_sampler(self, dataset: data.Dataset, batch_sampler: bool,
                       profile: Profile, shared: Storage, logger: Logger):
        assert batch_sampler

        return BatchSampler(RandomSampler(dataset),
                            batch_size=profile.batch_size,
                            drop_last=profile.drop_last)
Beispiel #16
0
    def __init__(self, base_dset, batch_size):
        super(CoherencyPairBatchWrapper, self).__init__()
        assert isinstance(base_dset, CoherencyPairDataSet)
        self.base = base_dset
        self.batch_size = batch_size

        self.sampler = list(BatchSampler(SequentialSampler(self.base), batch_size=self.batch_size, drop_last=False))
Beispiel #17
0
def get_problem(rank, world_size, batch_size, device, model_name: str):
    # Select the desired model on the fly
    logging.info(f"Using {model_name} for benchmarking")

    try:
        model = getattr(importlib.import_module("torchvision.models"), model_name)(pretrained=False).to(device)
    except AttributeError:
        model = getattr(importlib.import_module("timm.models"), model_name)(pretrained=False).to(device)

    # Data setup, duplicate the grey channels to get pseudo color
    def collate(inputs: List[Any]):
        return {
            "inputs": torch.stack([i[0] for i in inputs]).repeat(1, 3, 1, 1).to(device),
            "label": torch.tensor([i[1] for i in inputs]).to(device),
        }

    # Transforms
    transforms = []
    if model_name.startswith("vit"):
        # ViT models are fixed size. Add a ad-hoc transform to resize the pictures accordingly
        pic_size = int(model_name.split("_")[-1])
        transforms.append(Resize(pic_size))

    transforms.append(ToTensor())

    dataset = MNIST(transform=Compose(transforms), download=False, root=TEMPDIR)
    sampler: Sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank)
    batch_sampler = BatchSampler(sampler, batch_size, drop_last=True)
    dataloader = DataLoader(dataset=dataset, batch_sampler=batch_sampler, collate_fn=collate)

    loss_fn = nn.CrossEntropyLoss()
    return model, dataloader, loss_fn
Beispiel #18
0
    def fit(self, train_x1, train_x2, train_x3, test_x1, test_x2, test_x3):
        data_size = train_x1.size(0)
        train_losses = []
        for epoch in range(self.epoch_num):
            self.arch.train()
            batch_idxs = list(
                BatchSampler(RandomSampler(range(data_size)),
                             batch_size=self.batch_size,
                             drop_last=False))
            for x in batch_idxs:
                self.optimizer.zero_grad()
                batch_x1 = train_x1[x, :]
                batch_x2 = train_x2[x, :]
                batch_x3 = train_x3[x, :]
                o1, o2, o3 = self.arch(batch_x1, batch_x2, batch_x3)
                loss = self.loss_function(o1, o2, o3)
                train_losses.append(loss.data)
                loss.backward()
                self.optimizer.step()

            # training the gcca model
            _, outputs = self._get_outputs(train_x1, train_x2, train_x3)
            GCCA_obj = GCCA(self.out_size, method="gcca")
            GCCA_obj.fit(outputs[0], outputs[1], outputs[2],
                         params=None)  #function from cca-zoo

            loss = self.test(test_x1, test_x2, test_x3)
        print("Fitted Model to Data")
 def __init__(self,
              dataset: Any,
              max_repeats: int,
              batch_size: int = 1,
              shuffle: bool = False,
              use_imbalanced_sampler: bool = False,
              drop_last: bool = False,
              **kwargs: Any):
     """
     Creates a new data loader.
     :param dataset: The dataset that should be loaded.
     :param batch_size: The number of samples per minibatch.
     :param shuffle: If true, the dataset will be shuffled randomly.
     :param drop_last: If true, drop incomplete minibatches at the end.
     :param kwargs: Additional arguments that will be passed through to the Dataloader constructor.
     """
     sampler = RandomSampler(dataset) if shuffle else SequentialSampler(
         dataset)
     if use_imbalanced_sampler:
         sampler = ImbalancedSampler(dataset)
     self._actual_batch_sampler = BatchSampler(sampler, batch_size,
                                               drop_last)
     repeat_sampler = _RepeatSampler(self._actual_batch_sampler,
                                     batch_size=batch_size,
                                     max_repeats=max_repeats)
     super().__init__(dataset=dataset,
                      batch_sampler=repeat_sampler,
                      **kwargs)
     self.iterator = None
Beispiel #20
0
 def predict(self, x1, x2, x3):
     #exception handling
     with torch.no_grad():
         self.arch.eval()
         data_size = x1.size(0)
         #making mini batches
         batch_idxs = list(
             BatchSampler(SequentialSampler(range(data_size)),
                          batch_size=self.batch_size,
                          drop_last=False))
         losses = []
         outputs1 = []
         outputs2 = []
         outputs3 = []
         for x in batch_idxs:
             batch_x1 = x1[x, :]
             batch_x2 = x2[x, :]
             batch_x3 = x3[x, :]
             #forward feeding to network
             o1, o2, o3 = self.arch(batch_x1, batch_x2, batch_x3)
             outputs1.append(o1)
             outputs2.append(o2)
             outputs3.append(o3)
             loss = self.loss_function(o1, o2, o3)
             losses.append(loss.item())
             #new features
     outputs = [
         torch.cat(outputs1, dim=0).numpy(),
         torch.cat(outputs2, dim=0).numpy(),
         torch.cat(outputs3, dim=0).numpy()
     ]
     return losses, outputs
Beispiel #21
0
 def _get_outputs(self, x1, x2, x3):
     with torch.no_grad():
         self.arch.eval()
         data_size = x1.size(0)
         batch_idxs = list(
             BatchSampler(SequentialSampler(range(data_size)),
                          batch_size=self.batch_size,
                          drop_last=False))
         losses = []
         outputs1 = []
         outputs2 = []
         outputs3 = []
         for x in batch_idxs:
             batch_x1 = x1[x, :]
             batch_x2 = x2[x, :]
             batch_x3 = x3[x, :]
             o1, o2, o3 = self.arch(batch_x1, batch_x2, batch_x3)
             outputs1.append(o1)
             outputs2.append(o2)
             outputs3.append(o3)
             loss = self.loss_function(o1, o2, o3)
             losses.append(loss.item())
     outputs = [
         torch.cat(outputs1, dim=0).numpy(),
         torch.cat(outputs2, dim=0).numpy(),
         torch.cat(outputs3, dim=0).numpy()
     ]
     return losses, outputs
Beispiel #22
0
 def validate(self, x, tfidf, y):
     '''
     For validation while training
     '''
     with torch.no_grad():
         self.model.eval()
         data_size = x.shape[0]
         batch_idxs = list(BatchSampler(SequentialSampler(
             range(data_size)), batch_size=self.batch_size, drop_last=False))
         losses = []
         loss_hidden_list = []
         loss_ae_list = []
         for batch_idx in batch_idxs:
             batch_x1 = x[batch_idx, :].to(self.device)
             batch_tfidf = tfidf[batch_idx].to(self.device)
             batch_y = y[batch_idx].to(self.device)
             x_hidden, y_hidden, y_predicted = self.model(
                 batch_x1, batch_tfidf, batch_y)
             loss_hidden, loss_ae = self.loss(x_hidden, y_hidden,
                                              y_predicted, batch_y)
             loss = loss_hidden+self.lamda*loss_ae
             losses.append(loss.item())
             loss_hidden_list.append(loss_hidden.item())
             loss_ae_list.append(loss_ae.item())
     return np.mean(losses), np.mean(loss_hidden_list), np.mean(loss_ae_list)
Beispiel #23
0
def mean_importance(model, dataset, loss, batch_size, bar=False):
    '''
    Calculate feature importance by measuring performance reduction when
    features are imputed with their mean value.

    Args:
      model: PyTorch model. Must be callable, likely inherits from nn.Module.
      dataset: PyTorch dataset, such as data.utils.TabularDataset.
      loss: string descriptor of loss function ('mse', 'cross entropy').
      batch_size: number of examples to be processed at once.
      bar: whether to display progress bar.
    '''
    # Setup.
    device = next(model.parameters()).device
    input_size = model.input_size
    loader = DataLoader(dataset,
                        batch_sampler=BatchSampler(SequentialSampler(dataset),
                                                   batch_size=batch_size,
                                                   drop_last=False))
    loss_fn = utils.get_loss_pytorch(loss, reduction='none')
    scores = []

    # Verify model outputs are valid.
    utils.verify_pytorch_model(model, next(iter(loader))[0], loss)

    # Performance with all features.
    base_loss = validate_pytorch(
        model, loader, utils.get_loss_pytorch(loss, reduction='mean')).item()

    # For imputing with mean.
    imputation = utils.ReferenceImputation(
        torch.mean(torch.tensor(dataset.data), dim=0))

    if bar:
        bar = tqdm(total=len(dataset) * input_size)
    with torch.no_grad():
        for ind in range(input_size):
            # Setup.
            score = 0
            N = 0

            for x, y in loader:
                # Move to GPU.
                n = len(x)
                x = x.to(device=device)
                y = y.to(device=device)

                # Impute with mean and make predictions.
                y_hat = model(imputation.impute_ind(x, ind))

                # Measure loss and compute average.
                loss = torch.mean(loss_fn(y_hat, y))
                score = (score * N + loss * n) / (N + n)
                N += n
                if bar:
                    bar.update(n)

            scores.append(score)

    return (torch.stack(scores) - base_loss).cpu().data.numpy()
Beispiel #24
0
    def __init__(self,
                 vf_dataset,
                 batch_size=8,
                 shuffle=False,
                 drop_last=False,
                 *args,
                 **kwargs):

        self.vf_dataset = vf_dataset
        self.batch_size = batch_size
        self.shuffle = shuffle

        self.workers = vf_dataset.workers

        self.batch_samplers = {}
        for worker in self.workers:
            data_range = range(len(list(self.vf_dataset.datasets.values())))
            if shuffle:
                sampler = RandomSampler(data_range)
            else:
                sampler = SequentialSampler(data_range)
            batch_sampler = BatchSampler(sampler, self.batch_size, drop_last)
            self.batch_samplers[worker] = batch_sampler

        single_loaders = []
        for k in vfd.datasets.keys():
            single_loaders.append(
                SinglePartitionDataLoader(
                    vfd.datasets[k], batch_sampler=self.batch_samplers[k]))

        self.single_loaders = single_loaders
    def generate_batch(self, episodes, mode):
        total_steps = len(
            episodes)  # How many samples will be generated in total
        print("Total Steps: {}".format(total_steps))
        # Episode sampler
        # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch
        if mode == "test" or mode == "val":
            BS = len(episodes)
        else:
            BS = 16
        sampler = BatchSampler(
            RandomSampler(range(len(episodes)), replacement=False),
            BS,
            drop_last=False,
        )

        for indices in sampler:
            episodes_batch = [episodes[x] for x in indices]
            ts_number = torch.LongTensor(indices)
            i = 0
            subjects = []
            for episode in episodes_batch:
                # Get all samples from this episode
                subjects.append(episode)
            yield torch.stack(subjects).to(self.device), ts_number.to(
                self.device)
Beispiel #26
0
 def _compute_per_element_grads(self, theta_init):
     self.model.load_state_dict(theta_init)
     batch_wise_indices = np.array(
         [list(BatchSampler(SequentialSampler(np.arange(self.N_trn)), self.batch_size, drop_last=False))][0])
     cnt = 0
     for batch_idx in batch_wise_indices:
         inputs = torch.cat(
             [self.trainset[x][0].view(-1, 1, self.trainset[x][0].shape[1], self.trainset[x][0].shape[2]) for x in
              batch_idx], dim=0).type(torch.float)
         targets = torch.tensor([self.trainset[x][1] for x in batch_idx])
         inputs, targets = inputs.to(self.device), targets.to(self.device, non_blocking=True)
         if cnt == 0:
             with torch.no_grad():
                 data = F.softmax(self.model(inputs), dim=1)
             tmp_tensor = torch.zeros(len(inputs), self.num_classes).to(self.device)
             tmp_tensor.scatter_(1, targets.view(-1, 1), 1)
             outputs = tmp_tensor
             cnt = cnt + 1
         else:
             cnt = cnt + 1
             with torch.no_grad():
                 data = torch.cat((data, F.softmax(self.model(inputs), dim=1)), dim=0)
             tmp_tensor = torch.zeros(len(inputs), self.num_classes).to(self.device)
             tmp_tensor.scatter_(1, targets.view(-1, 1), 1)
             outputs = torch.cat((outputs, tmp_tensor), dim=0)
     grads_vec = data - outputs
     torch.cuda.empty_cache()
     print("Per Element Gradient Computation is Completed")
     self.grads_per_elem = grads_vec
Beispiel #27
0
    def generate_batch(self, episodes):
        total_steps = sum([len(e) for e in episodes])
        print('Total Steps: {}'.format(total_steps))
        # Episode sampler
        # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch
        sampler = BatchSampler(RandomSampler(range(len(episodes)),
                                             replacement=True,
                                             num_samples=total_steps),
                               self.batch_size,
                               drop_last=True)
        for indices in sampler:
            episodes_batch = [episodes[x] for x in indices]
            x_t, x_tprev, x_that, ts, thats = [], [], [], [], []
            for episode in episodes_batch:
                # Get one sample from this episode
                t, t_hat = 0, 0
                t, t_hat = np.random.randint(0,
                                             len(episode)), np.random.randint(
                                                 0, len(episode))
                x_t.append(episode[t])

                x_tprev.append(episode[t - 1])
                ts.append([t])
            yield torch.stack(x_t).float().to(
                self.device) / 255., torch.stack(x_tprev).float().to(
                    self.device) / 255.
Beispiel #28
0
    def __init__(self, data_source, batch_size, num_replicas=None, rank=None):
        if num_replicas is None:
            if not dist.is_available():
                raise RuntimeError(
                    "Requires distributed package to be available")
            num_replicas = dist.get_world_size()
        if rank is None:
            if not dist.is_available():
                raise RuntimeError(
                    "Requires distributed package to be available")
            rank = dist.get_rank()

        self.data_source = data_source
        self.batch_size = batch_size
        self.num_replicas = num_replicas
        assert len(self.data_source) % self.batch_size == 0

        self.batch_sampler = list(
            BatchSampler(SequentialSampler(range(len(self.data_source))),
                         batch_size=self.batch_size,
                         drop_last=True))

        self.num_samples = int(
            math.floor(len(self.batch_sampler) * 1.0 / self.num_replicas))

        self.total_size = self.num_samples * self.num_replicas

        # print("sample log --------------", self.total_size, len(self.batch_sampler))
        self.random_id_sampler = torch.randperm(self.total_size).tolist()

        self.rank = rank
        self.epoch = 0
Beispiel #29
0
    def generate_batch(self, episodes):
        total_steps = sum([len(e) for e in episodes])
        print('Total Steps: {}'.format(total_steps))
        # Episode sampler
        # Sample `num_samples` episodes then batchify them with `self.batch_size` episodes per batch
        sampler = BatchSampler(RandomSampler(range(len(episodes)),
                                             replacement=True,
                                             num_samples=total_steps),
                               self.batch_size,
                               drop_last=True)
        for indices in sampler:
            episodes_batch = [episodes[x] for x in indices]
            x_t, x_tprev, x_that, ts, thats = [], [], [], [], []
            for episode in episodes_batch:
                # Get one sample from this episode
                t, t_hat = 0, 0
                t, t_hat = np.random.randint(0,
                                             len(episode)), np.random.randint(
                                                 0, len(episode))
                x_t.append(episode[t])

                # Apply the same transform to x_{t-1} and x_{t_hat}
                # https://github.com/pytorch/vision/issues/9#issuecomment-383110707
                # Use numpy's random seed because Cutout uses np
                # seed = random.randint(0, 2 ** 32)
                # np.random.seed(seed)
                x_tprev.append(episode[t - 1])
                # np.random.seed(seed)
                x_that.append(episode[t_hat])

                ts.append([t])
                thats.append([t_hat])
            yield torch.stack(x_t).float().to(self.device) / 255., torch.stack(x_tprev).float().to(self.device) / 255., \
                  torch.stack(x_that).float().to(self.device) / 255., torch.Tensor(ts).to(self.device), \
                  torch.Tensor(thats).to(self.device)
Beispiel #30
0
    def fit(self, w, xu, nb_iter=100, batch_size=None,
            lr=1e-3, method='adam', verbose=False, **kwargs):

        l2 = kwargs.get('l2', 0.)
        if method == 'adam':
            self.optim = Adam(self.parameters(), lr=lr, weight_decay=l2)
        else:
            momentum = kwargs.get('momentum', 0.)
            self.optim = SGD(self.parameters(), lr=lr, weight_decay=l2, momentum=momentum)

        set_size = xu.shape[0]
        batch_size = set_size if batch_size is None else batch_size
        batches = list(BatchSampler(SubsetRandomSampler(range(set_size)), batch_size, True))

        for n in range(nb_iter):
            for batch in batches:
                self.optim.zero_grad()
                loss = - self.elbo(w[batch], xu[batch], batch_size, set_size)
                loss.backward()
                self.optim.step()

            if verbose:
                if n % 100 == 0:
                    print('Epoch: {}/{}.............'.format(n, nb_iter), end=' ')
                    print("Loss: {:.4f}".format(loss))