Exemple #1
0
    def test_horovod_broadcast_rank_error(self):
        """Test that the broadcast returns an error if different ranks
           specify different root rank."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            self.skipTest("Only one worker available")

        ctx = self._current_context()
        shape = (17, 17, 17)
        tensor = mx.nd.ones(shape=shape, ctx=ctx)
        try:
            output = hvd.broadcast(tensor, root_rank=rank)
            output.wait_to_read()
            assert False, 'hvd.broadcast did not throw rank error'
        except (MXNetError, RuntimeError):
            pass
Exemple #2
0
    def test_horovod_broadcast_error(self):
        """Test that the broadcast returns an error if any dimension besides
           the first is different among the tensors being broadcasted."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            self.skipTest("Only one worker available")

        ctx = self._current_context()
        shape = (17, rank + 1)
        tensor = mx.nd.ones(shape=shape, ctx=ctx)

        try:
            output = hvd.broadcast(tensor, 0)
            output.wait_to_read()
            assert False, 'hvd.broadcast did not throw error'
        except (MXNetError, RuntimeError):
            pass
    def broadcast(self, key, value, out, priority=0):
        """ Broadcast the `value` NDArray at rank 0 to all ranks

        Parameters
        ----------
        key : str, or int
            The key is used to name the tensor for allreduce. Its
            usage is different from that of parameter servers.

        value : NDArray
            The tensor that is to be broadcasted.

        out : NDArray, list of NDArray
            Output tensor that receives value broadcasted from root process

        priority : int, optional
            The priority of the operation.
            Higher priority operations are likely to be executed before other actions.

        Examples
        --------
        >>> a = mx.nd.ones(shape)
        >>> b = mx.nd.zeros(shape)
        >>> kv.broadcast('2', value=a, out=b)
        >>> print(b.asnumpy)
        [[ 1.  1.  1.]
        [ 1.  1.  1.]]
        """
        import horovod.mxnet as hvd

        out = out if isinstance(out, list) else [out]

        # TODO (lnyuan): need to copy data to each device memory
        for o in out:
            o[:] = hvd.broadcast(tensor=value,
                                 root_rank=0,
                                 name=str(key),
                                 priority=priority)
Exemple #4
0
 def broadcast_parameters(params):
     rank_0_dict = {}
     # Run broadcasts.
     for key, tensor in params.items():
         rank_0_dict[key] = hvd.broadcast(tensor, 0, key)
     return rank_0_dict