def _get_sampler(train_set, test_set, val_set, train_sampler, test_sampler, val_sampler, start_epoch): if train_sampler is None: if is_distributed(): train_sampler = DistributedSampler( train_set, num_replicas=get_world_size(), rank=get_global_rank()) train_sampler.set_epoch(start_epoch) else: train_sampler = RandomSampler(train_set, True) else: train_sampler = train_sampler(train_set) if test_sampler is None: if is_distributed(): test_sampler = DistributedSampler( test_set, num_replicas=get_world_size(), rank=get_global_rank()) else: test_sampler = test_sampler(test_set) if val_set is not None: if val_sampler is None and is_distributed(): val_sampler = DistributedSampler(val_set, num_replicas=get_world_size(), rank=get_global_rank()) val_sampler.set_epoch(start_epoch) elif val_sampler is not None: val_sampler = val_sampler(val_set) return train_sampler, test_sampler, val_sampler
def main(cfg: Config): if cfg.enable_accimage: enable_accimage() model = resnet50() optimizer = optim.SGD(lr=cfg.base_lr * cfg.batch_size * get_world_size() / 256, momentum=0.9, weight_decay=1e-4, multi_tensor=True) scheduler = lr_scheduler.MultiStepLR([30, 60, 90]) train_loader, test_loader = DATASET_REGISTRY("imagenet")( cfg.batch_size, train_size=cfg.batch_size * 50 if cfg.debug else None, test_size=cfg.batch_size * 50 if cfg.debug else None, num_workers=cfg.num_workers) use_multi_gpus = not is_distributed() and torch.cuda.device_count() > 1 with SupervisedTrainer(model, optimizer, F.cross_entropy, reporters=[reporters.TensorboardReporter(".")], scheduler=scheduler, data_parallel=use_multi_gpus, use_amp=cfg.use_amp, use_cuda_nonblocking=True, use_sync_bn=cfg.use_sync_bn, report_accuracy_topk=5) as trainer: for _ in trainer.epoch_range(cfg.epochs): trainer.train(train_loader) trainer.test(test_loader) trainer.scheduler.step() print( f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}" )
def vision_loaders(name: str, batch_size: int, train_da: Optional[List] = None, test_da: Optional[List] = None, norm: Optional[List] = None, val_size: int = 0, download: bool = False, num_workers: int = -1, non_training_bs_factor=2, distributed: bool = False, drop_last: bool = False, pin_memory: bool = True, return_num_classes: bool = False, test_batch_size: Optional[int] = None ) -> Tuple: """ Get data loaders for registered vision datasets. homura expects datasets are in `~/.torch/data/DATASET_NAME`. Link path if necessary, e.g. `ln -s /original/path $HOME/.torch`. Datasets can be registered using `homura.vision.register_dataset` :param name: name of dataset. :param batch_size: :param train_da: custom train-time data augmentation :param test_da: custom test-time data augmentation :param norm: custom normalization after train_da/test_da :param val_size: If `val_size>0`, split train set :param download: :param num_workers: :param non_training_bs_factor: :param distributed: :param return_num_classes: :return: (train_set, test_set, [val_set], [num_classes]) """ if name not in _DATASETS.keys(): raise RuntimeError(f'Unknown dataset name {name}.') dataset = _DATASETS[name] train_set, test_set = dataset.instantiate(train_da, test_da, norm, download) if test_batch_size is None: test_batch_size = non_training_bs_factor * batch_size if val_size > 0: train_set, val_set = _split_dataset(train_set, val_size) val_set.transform = test_set.transform samplers = [None, None, None] if distributed: import homura kwargs = dict(num_replicas=homura.get_world_size(), rank=homura.get_global_rank()) samplers[0] = DistributedSampler(train_set, **kwargs) samplers[2] = DistributedSampler(test_set, **kwargs) else: samplers[0] = RandomSampler(train_set, True) shared_kwargs = dict(drop_last=drop_last, num_workers=num_workers, pin_memory=pin_memory, collate_fn=dataset.collate_fn) train_loader = DataLoader(train_set, batch_size, sampler=samplers[0], **shared_kwargs) test_loader = DataLoader(test_set, test_batch_size, sampler=samplers[2], **shared_kwargs) ret = [train_loader, test_loader] if val_size > 0: if distributed: samplers[1] = DistributedSampler(test_set, **kwargs) val_loader = DataLoader(val_set, test_batch_size, sampler=samplers[1], **shared_kwargs) ret.append(val_loader) if return_num_classes: ret.append(dataset.num_classes) return tuple(ret)
def get_dataloader( self, batch_size: int, train_da: Optional[List] = None, test_da: Optional[List] = None, norm: Optional[List] = None, train_size: Optional[int] = None, test_size: Optional[int] = None, val_size: Optional[int] = None, download: bool = False, num_workers: int = 1, non_training_bs_factor=2, drop_last: bool = False, pin_memory: bool = True, return_num_classes: bool = False, test_batch_size: Optional[int] = None, pre_default_train_da: Optional[List] = None, post_default_train_da: Optional[List] = None, post_norm_train_da: Optional[List] = None, use_prefetcher: bool = False, start_epoch: bool = 0 ) -> (Tuple[DataLoader, DataLoader] or Tuple[DataLoader, DataLoader, DataLoader] or Tuple[DataLoader, DataLoader, int] or Tuple[DataLoader, DataLoader, DataLoader, int]): """ Get Dataloader. This will automatically handle distributed setting :param batch_size: Batch size :param train_da: Data Augmentation for training :param test_da: Data Augmentation for testing and validation :param norm: Normalization after train_da and test_da :param train_size: Size of training dataset. If None, full dataset will be available. :param test_size: Size of test dataset. If None, full dataset will be available. :param val_size: Size of validation dataset, randomly split from the training dataset. If None, None will be returned. :param download: If dataset needs downloading :param num_workers: Number of workers in data loaders :param non_training_bs_factor: Batch size scale factor during non training. For example, testing time requires no backward cache, so basically batch size can be doubled. :param drop_last: If drop last batch or not :param pin_memory: If pin memory or not :param return_num_classes: If return number of classes as the last return value :param test_batch_size: Test time batch size. If None, non_training_bs_factor * batch_size is used. :param pre_default_train_da: Data Augmentation before the default data augmentation :param post_default_train_da: Data Augmentation after the default data augmentation :param post_norm_train_da: Data Augmentation after normalization (i.e., norm) :param use_prefetcher: Use prefetcher or Not :param start_epoch: Epoch at start time :return: train_loader, test_loader, [val_loader], [num_classes] """ train_set, test_set, val_set = self.get_dataset( train_size, test_size, val_size, train_da, test_da, norm, download, pre_default_train_da=pre_default_train_da, post_default_train_da=post_default_train_da, post_norm_train_da=post_norm_train_da) if test_batch_size is None: test_batch_size = non_training_bs_factor * batch_size samplers = [None, None, None] if is_distributed(): import homura dist_sampler_kwargs = dict(num_replicas=homura.get_world_size(), rank=homura.get_global_rank()) samplers[0] = DistributedSampler(train_set, **dist_sampler_kwargs) samplers[2] = DistributedSampler(test_set, **dist_sampler_kwargs) samplers[0].set_epoch(start_epoch) samplers[2].set_epoch(start_epoch) else: samplers[0] = RandomSampler(train_set, True) shared_kwargs = dict(drop_last=drop_last, num_workers=num_workers, pin_memory=pin_memory, collate_fn=self.collate_fn) train_loader = DataLoader(train_set, batch_size, sampler=samplers[0], **shared_kwargs) test_loader = DataLoader(test_set, test_batch_size, sampler=samplers[2], **shared_kwargs) if use_prefetcher: train_loader = DataPrefetchWrapper(train_loader, start_epoch) test_loader = DataPrefetchWrapper(test_loader, start_epoch) ret = [train_loader, test_loader] if val_set is not None: if is_distributed(): samplers[1] = DistributedSampler(val_set, **dist_sampler_kwargs) samplers[1].set_epoch(start_epoch) val_loader = DataLoader(val_set, test_batch_size, sampler=samplers[1], **shared_kwargs) if use_prefetcher: val_loader = DataPrefetchWrapper(test_loader) ret.append(val_loader) if return_num_classes: ret.append(self.num_classes) return tuple(ret)
def __post_init__(self): assert self.optim.lr > self.optim.min_lr self.optim.lr *= self.data.batch_size * homura.get_world_size() / 512 self.optim.min_lr *= self.data.batch_size * homura.get_world_size( ) / 512