def _add_extra_args_for_dataloader(self, dataset, opts, other_args={}): training_parameters = self._global_config.training_parameters dataset_type = self._dataset_type other_args["shuffle"] = False if dataset_type != "test": other_args["shuffle"] = True if (training_parameters.local_rank is not None and training_parameters.distributed): other_args["sampler"] = DistributedSampler( dataset, shuffle=other_args["shuffle"]) # Shuffle is mutually exclusive with sampler, let DistributedSampler take care of # shuffle and pop from main args other_args.pop("shuffle") other_args["batch_size"] = get_batch_size() return other_args
def __len__(self): # Since, this is iterator, we need to return total length == number of batches return self._total_length // get_batch_size()