Esempio n. 1
0
    def __init__(self, params, optimizer, optimizer_params=None,
                 compression=Compression.none,
                 gradient_predivide_factor=1.0, prefix=None,
                 num_groups=0):
        self._compression = compression

        if gradient_predivide_factor != 1.0 and rocm_built():
            raise ValueError('gradient_predivide_factor not supported yet with ROCm')
        if isinstance(optimizer, DistributedOptimizer):
            optimizer = optimizer._optimizer
            warnings.warn("DistributedTrainer does not take DistributedOptimizer "
                          "as its optimizer. We have unwrapped it for you.")

        # To ensure consistent parameter ordering across workers, sort params before
        # passing to base Trainer constructor. This logic is consistent with trainer.py
        # since v1.6 but we do it here for backwards compatability
        if isinstance(params, dict):
            params = OrderedDict(params)
        elif isinstance(params, (list, tuple)):
            params = sorted(params)

        super(DistributedTrainer, self).__init__(
            params, optimizer, optimizer_params=optimizer_params, kvstore=None)

        # _scale is used to check and set rescale_grad for optimizer in Trainer.step()
        # function. Normalizing it by Horovod size, which is equivalent to performing
        # average in allreduce, has better performance. 
        self._scale *= (gradient_predivide_factor / size())
        self._gradient_predivide_factor = gradient_predivide_factor
        assert prefix is None or isinstance(prefix, str)
        self._prefix = prefix if prefix else ""
        self._num_groups = num_groups
Esempio n. 2
0
    def __init__(self,
                 params,
                 optimizer,
                 optimizer_params=None,
                 gradient_predivide_factor=1.0,
                 prefix=None):
        if gradient_predivide_factor != 1.0 and rocm_built():
            raise ValueError(
                'gradient_predivide_factor not supported yet with ROCm')
        if isinstance(optimizer, DistributedOptimizer):
            optimizer = optimizer._optimizer
            warnings.warn(
                "DistributedTrainer does not take DistributedOptimizer "
                "as its optimizer. We have unwrapped it for you.")

        super(DistributedTrainer,
              self).__init__(params,
                             optimizer,
                             optimizer_params=optimizer_params,
                             kvstore=None)

        # _scale is used to check and set rescale_grad for optimizer in Trainer.step()
        # function. Normalizing it by Horovod size, which is equivalent to performing
        # average in allreduce, has better performance.
        self._scale *= (gradient_predivide_factor / size())
        self._gradient_predivide_factor = gradient_predivide_factor
        assert prefix is None or isinstance(prefix, str)
        self._prefix = prefix if prefix else ""
Esempio n. 3
0
    def __init__(self, optimizer, gradient_predivide_factor=1.0):
        if gradient_predivide_factor != 1.0 and rocm_built():
            raise ValueError(
                'gradient_predivide_factor not supported yet with ROCm')

        self._optimizer = optimizer
        # Normalizing rescale_grad by Horovod size, which is equivalent to
        # performing average in allreduce, has better performance.
        self._optimizer.rescale_grad *= (gradient_predivide_factor / size())
        self._gradient_predivide_factor = gradient_predivide_factor