コード例 #1
0
    def __init__(self, ncols: int = 80) -> None:
        self.writer = None
        self._ncols = ncols
        self._temporal_memory = {}

        liblog._set_tqdm_handler()
        liblog._set_tqdm_print()
コード例 #2
0
ファイル: reporters.py プロジェクト: matrixgame2018/homura
    def __init__(self, iterator: Iterable, verb: bool = False):

        super(TQDMReporter, self).__init__()
        self.writer = tqdm.tqdm(iterator,
                                ncols=80) if is_master() else iterator
        self._verb = verb
        self._logger = liblog.get_logger(__name__)
        self._length = len(iterator)
        liblog._set_tqdm_handler()
        liblog._set_tqdm_print()
コード例 #3
0
ファイル: trainers.py プロジェクト: ChenBindtp/homura
    def __init__(self,
                 model: nn.Module or Dict[str, nn.Module],
                 optimizer: Optional[Partial or Optimizer
                                     or Dict[str, Optimizer]],
                 loss_f: Optional[Callable or Dict[str, Callable]] = None,
                 *,
                 reporters: Optional[_ReporterBase
                                     or List[_ReporterBase]] = None,
                 scheduler: Optional[Partial or Scheduler
                                     or Dict[str, Scheduler]] = None,
                 update_scheduler_by_epoch: bool = True,
                 device: Optional[torch.device or str] = None,
                 verb: bool = True,
                 use_cudnn_benchmark: bool = True,
                 use_cuda_nonblocking: bool = False,
                 use_horovod: bool = False,
                 logger=None,
                 use_sync_bn: bool = False,
                 tqdm_ncols: int = 80,
                 **kwargs):

        if kwargs.get("callbacks"):
            raise DeprecationWarning(
                "callback is deprecated, if you need, use homura before v2020.8"
            )

        self.logger = logger or get_logger(__name__)

        self.device = device or (torch.device(
            GPU) if torch.cuda.is_available() else torch.device(CPU))

        # setup for distributed
        self._use_sync_bn = use_sync_bn
        if is_distributed():
            if self._use_sync_bn:
                model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
                self.logger.info(
                    "BNs of model are converted to nn.SyncBatchNorm")

            rank = get_local_rank()
            torch.cuda.set_device(rank)
            if get_global_rank() > 0:
                # to avoid overwriting
                verb = False

        # setup model
        if isinstance(model, nn.Module):
            self.model = model
        elif isinstance(model, dict):
            self.model = nn.ModuleDict(model)
        else:
            raise TypeError(
                f"Unknown type for `model`. Expected nn.Module or Dict[str, Module], but got {type(model)}"
            )

        if GPU in str(self.device):
            self.model.to(self.device)
            torch.backends.cudnn.benchmark = use_cudnn_benchmark
            self._cuda_nonblocking = use_cuda_nonblocking
            self.logger.debug(
                f"cuda: True, cudnn.benchmark: {use_cudnn_benchmark}, "
                f"cuda.nonblocking: {use_cuda_nonblocking}")
        else:
            self._cuda_nonblocking = False
            # usually, this is not expected
            self.logger.info(
                f"cuda: False (torch.cuda.is_available()={torch.cuda.is_available()})"
            )

        if not use_horovod and is_distributed():
            self.model = nn.parallel.DistributedDataParallel(self.model,
                                                             device_ids=[rank])

        # self.accessible_model is useful for e.g., checkpointing
        if isinstance(self.model,
                      nn.parallel.DistributedDataParallel) or isinstance(
                          self.model, nn.DataParallel):
            self.accessible_model = self.model.module
        else:
            self.accessible_model = self.model

        self.loss_f = loss_f
        self._verb = verb

        # setup optimizer and scheduler
        self.optimizer = optimizer
        self.scheduler = scheduler
        self._update_scheduler_by_epoch = update_scheduler_by_epoch
        self.set_optimizer()
        self.set_scheduler()

        if use_horovod:
            if not is_horovod_available():
                raise RuntimeError("horovod is not available!")
            import horovod.torch as hvd

            hvd.broadcast_parameters(self.model.state_dict(), root_rank=0)
            hvd.broadcast_optimizer_state(self.optimizer, root_rank=0)
            self.optimizer = hvd.DistributedOptimizer(
                self.optimizer, named_parameters=self.model.named_parameters())

        if reporters is not None and not isinstance(reporters, Iterable):
            reporters = [reporters]
        reporters = reporters or []

        if not any([isinstance(rep, TQDMReporter) for rep in reporters]):
            # if reporters not contain TQDMReporter
            reporters.append(TQDMReporter(ncols=tqdm_ncols))
        self.reporter = ReporterList(reporters)

        # called via property
        # _step and _epoch are set to -1 because they are incremented before each iteration and epoch
        self._step = -1
        self._epoch = -1
        self._is_train = True

        # to nest, leave=False (https://github.com/tqdm/tqdm/blob/master/examples/simple_examples.py#L19)
        self._tqdm = lambda x: x
        if verb:
            self._tqdm = Partial(tqdm, ncols=tqdm_ncols, leave=False)
            _set_tqdm_print()

        for k, v in kwargs.items():
            if hasattr(self, k):
                raise AttributeError(f"{self} already has {k}")
            if torch.is_tensor(v):
                v = v.to(self.device)
            if isinstance(v, nn.Module):
                v.to(self.device)
            setattr(self, k, v)
            self.logger.debug(f"trainer sets {k} as a new attribute")
コード例 #4
0
ファイル: trainers.py プロジェクト: matrixgame2018/homura
    def __init__(self,
                 model: nn.Module or Dict[str, nn.Module],
                 optimizer: Optional[Partial or Optimizer
                                     or Dict[str, Optimizer]],
                 loss_f: Optional[Callable or Dict[str, Callable]],
                 *,
                 callbacks: Optional[Iterable[Callback]] = None,
                 scheduler: Optional[Partial or Scheduler
                                     or Dict[str, Scheduler]] = None,
                 update_scheduler_by_epoch: bool = True,
                 device: Optional[torch.device or str] = None,
                 verb: bool = True,
                 use_cudnn_benchmark: bool = True,
                 use_cuda_nonblocking: bool = False,
                 use_horovod: bool = False,
                 logger=None,
                 use_sync_bn: bool = False,
                 tqdm_ncols: int = 80,
                 **kwargs):

        if logger is None:
            logger = get_logger(__name__)
        self.logger = logger

        if device is None:
            self.device = torch.device(
                GPU) if torch.cuda.is_available() else torch.device(CPU)
        else:
            self.device = device

        if use_horovod and not is_horovod_available():
            raise RuntimeError('horovod is not available!')

        if is_distributed():
            if use_sync_bn:
                model = nn.SyncBatchNorm.convert_sync_batchnorm(model)

            rank = get_local_rank()
            torch.cuda.set_device(rank)
            if get_global_rank() > 0:
                # to avoid overwriting
                verb = False

        if isinstance(model, nn.Module):
            self.model = model
        elif isinstance(model, dict):
            self.model = nn.ModuleDict(model)
        else:
            raise TypeError(
                f"Unknown type for `model`. Expected nn.Module or Dict[str, Module] but got {type(model)}"
            )

        if GPU in str(self.device):
            self.model.to(self.device)
            torch.backends.cudnn.benchmark = use_cudnn_benchmark
            self._cuda_nonblocking = use_cuda_nonblocking
            self.logger.debug(
                f"cuda: True, cudnn.benchmark: {use_cudnn_benchmark}, "
                f"cuda.nonblocking: {use_cuda_nonblocking}")
        else:
            self._cuda_nonblocking = False
            # usually, this is not expected
            self.logger.info(
                f"cuda: False (torch.cuda.is_available()={torch.cuda.is_available()})"
            )

        if not use_horovod and is_distributed():
            self.model = nn.parallel.DistributedDataParallel(self.model,
                                                             device_ids=[rank])

        if isinstance(self.model,
                      nn.parallel.DistributedDataParallel) or isinstance(
                          self.model, nn.DataParallel):
            self.accessible_model = self.model.module
        else:
            self.accessible_model = self.model

        self.optimizer = optimizer
        self.scheduler = scheduler
        self._callbacks = callbacks
        self.update_scheduler_by_epoch = update_scheduler_by_epoch

        self.set_optimizer()
        self.set_scheduler()
        self._set_callbacks(callbacks)

        if use_horovod:
            import horovod.torch as hvd

            hvd.broadcast_parameters(self.model.state_dict(), root_rank=0)
            hvd.broadcast_optimizer_state(self.optimizer, root_rank=0)
            self.optimizer = hvd.DistributedOptimizer(
                self.optimizer, named_parameters=self.model.named_parameters())

        self.loss_f = loss_f
        self._verb = verb

        # called via property
        # _step and _epoch are set to -1 because they are incremented before each iteration and epoch!
        self._step = -1
        self._epoch = -1
        self._is_train = True
        # to nest, leave=False (https://github.com/tqdm/tqdm/blob/master/examples/simple_examples.py#L19)
        self._tqdm = lambda x: x
        if verb:
            self._tqdm = Partial(tqdm, ncols=tqdm_ncols, leave=False)
            _set_tqdm_print()

        _map_base = {
            MODEL: self.accessible_model,
            OPTIMIZER: self.optimizer,
            SCHEDULER: self.scheduler,
            TRAINER: self
        }
        self._iteration_map = TensorMap(**_map_base.copy())
        self._epoch_map = TensorMap(**_map_base.copy())
        self._all_map = TensorMap(**_map_base.copy())

        for k, v in kwargs.items():
            if hasattr(self, k):
                raise AttributeError(f"{self} already has {k}")
            if torch.is_tensor(v):
                v = v.to(self.device)
            if isinstance(v, nn.Module):
                v.to(self.device)
            setattr(self, k, v)

        self._callbacks.before_all(self._all_map)