コード例 #1
0
    def alltoall(self, xs):
        """A primitive of inter-process all-to-all function.

        This method tries to invoke all-to-all communication within the
        communicator. All processes in the communicator are expected to
        invoke ``alltoall()``. This method relies on mpi4py fast communication
        optimized for numpy arrays, as well as ``send()`` and ``recv()``.

        If ``xs`` is numpy array, the returned array will also be allocated
        as numpy array. Additionally, when ``xs`` is cupy array, the returned
        array will be placed at current device
        (``https://docs-cupy.chainer.org/en/stable/tutorial/basic.html#current-device``)
        regardless of which device the argument is placed at remote nodes.

        Args:
            xs (tuple of numpy/cupy array)

        Returns:
            ys (tuple of numpy/cupy array):
                Received arrays. The length of tuple equals to
                the communicator size.
        """
        chainer.utils.experimental(
            'chainermn.communicators.MpiCommunicatorBase.alltoall')

        if len(xs) != self.size:
            raise ValueError(
                'The length of data must be same as communicator size.')

        # Type check.
        msgtypes = [_MessageType(x) for x in xs]
        for msgtype in msgtypes:
            _check_dtype('alltoall', msgtype)
        _check_dtypes_are_same(msgtypes)
        send_msgtype = msgtypes[0]

        msgtypes = self.mpi_comm.alltoall(msgtypes)
        _check_dtypes_are_same(msgtypes)
        recv_msgtype = msgtypes[0]

        # Collective communication.
        slens = [x.size for x in xs]
        xp = chainer.backend.get_array_module(*xs)
        sbuf = xp.hstack([x.reshape(-1) for x in xs])
        shapes = [msgtype.shapes[0] for msgtype in msgtypes]
        rlens = [chainer.utils.size_of_shape(s) for s in shapes]
        rbuf = xp.empty([sum(rlens)], dtype=msgtype.dtype)
        if xp is not numpy:
            sbuf = _memory_utility.get_device_memory_pointer(sbuf)
            chainer.cuda.Stream.null.synchronize()
        self.mpi_comm.Alltoallv(
            [sbuf, (slens, _cnt_to_dsp(slens)), _get_mpi_type(send_msgtype)],
            [_memory_utility.get_device_memory_pointer(rbuf),
             (rlens, _cnt_to_dsp(rlens)), _get_mpi_type(recv_msgtype)])
        ys = [rbuf[i:i + l].reshape(s)
              for i, l, s in zip(_cnt_to_dsp(rlens), rlens, shapes)]

        return tuple(ys)
コード例 #2
0
    def alltoall(self, xs):
        """A primitive of inter-process all-to-all function.

        This method tries to invoke all-to-all communication within the
        communicator. All processes in the communicator are expected to
        invoke ``alltoall()``. This method relies on mpi4py fast communication
        optimized for numpy arrays, as well as ``send()`` and ``recv()``.

        If ``xs`` is numpy array, the returned array will also be allocated
        as numpy array. Additionally, when ``xs`` is cupy array, the returned
        array will be placed at current device
        (``https://docs-cupy.chainer.org/en/stable/tutorial/basic.html#current-device``)
        regardless of which device the argument is placed at remote nodes.

        Args:
            xs (tuple of numpy/cupy array)

        Returns:
            ys (tuple of numpy/cupy array):
                Received arrays. The length of tuple equals to
                the communicator size.
        """
        chainer.utils.experimental(
            'chainermn.communicators.MpiCommunicatorBase.alltoall')

        if len(xs) != self.size:
            raise ValueError(
                'The length of data must be same as communicator size.')

        # Type check.
        msgtypes = [_MessageType(x) for x in xs]
        for msgtype in msgtypes:
            _check_dtype('alltoall', msgtype)
        _check_dtypes_are_same(msgtypes)
        send_msgtype = msgtypes[0]

        msgtypes = self.mpi_comm.alltoall(msgtypes)
        _check_dtypes_are_same(msgtypes)
        recv_msgtype = msgtypes[0]

        # Collective communication.
        slens = [x.size for x in xs]
        xp = chainer.backend.get_array_module(*xs)
        sbuf = xp.hstack([x.reshape(-1) for x in xs])
        shapes = [msgtype.shapes[0] for msgtype in msgtypes]
        rlens = [chainer.utils.size_of_shape(s) for s in shapes]
        rbuf = xp.empty([sum(rlens)], dtype=msgtype.dtype)
        if xp is not numpy:
            sbuf = _memory_utility.get_device_memory_pointer(sbuf)
            chainer.cuda.Stream.null.synchronize()
        self.mpi_comm.Alltoallv(
            [sbuf, (slens, _cnt_to_dsp(slens)), _get_mpi_type(send_msgtype)],
            [_memory_utility.get_device_memory_pointer(rbuf),
             (rlens, _cnt_to_dsp(rlens)), _get_mpi_type(recv_msgtype)])
        ys = [rbuf[i:i + l].reshape(s)
              for i, l, s in zip(_cnt_to_dsp(rlens), rlens, shapes)]

        return tuple(ys)
コード例 #3
0
    def allgather(self, x):
        chainer.utils.experimental(
            'chainermn.communicators.MpiCommunicatorBase.allgather')

        msgtype = _MessageType(x)
        _check_dtype('allgather', msgtype)

        msgtypes = self.mpi_comm.allgather(msgtype)
        _check_dtypes_are_same(msgtypes)

        # Type check.
        for msgtype in msgtypes:
            if msgtype.is_tuple:
                raise TypeError('allgather cannot handle tuple data')

            assert len(msgtype.shapes) == 1

        # Collective communication.
        xp = chainer.backend.get_array_module(x)
        shapes = [msgtype.shapes[0] for msgtype in msgtypes]
        sbuf = _memory_utility.array_to_buffer_object(
            x, _get_mpi_type(msgtype))
        rlens = [chainer.utils.size_of_shape(s) for s in shapes]
        rbuf = xp.empty([sum(rlens)], dtype=msgtype.dtype)
        if xp is not numpy:
            chainer.cuda.Stream.null.synchronize()
        self.mpi_comm.Allgatherv(
            sbuf,
            [_memory_utility.get_device_memory_pointer(rbuf),
             (rlens, _cnt_to_dsp(rlens)), _get_mpi_type(msgtype)])
        ys = [rbuf[i:i + l].reshape(s)
              for i, l, s in zip(_cnt_to_dsp(rlens), rlens, shapes)]

        return tuple(ys)
コード例 #4
0
    def allgather(self, x):
        chainer.utils.experimental(
            'chainermn.communicators.MpiCommunicatorBase.allgather')

        msgtype = _MessageType(x)
        _check_dtype('allgather', msgtype)

        msgtypes = self.mpi_comm.allgather(msgtype)
        _check_dtypes_are_same(msgtypes)

        # Type check.
        for msgtype in msgtypes:
            if msgtype.is_tuple:
                raise TypeError('allgather cannot handle tuple data')

            assert len(msgtype.shapes) == 1

        # Collective communication.
        xp = chainer.backend.get_array_module(x)
        shapes = [msgtype.shapes[0] for msgtype in msgtypes]
        sbuf = _memory_utility.array_to_buffer_object(
            x, _get_mpi_type(msgtype))
        rlens = [chainer.utils.size_of_shape(s) for s in shapes]
        rbuf = xp.empty([sum(rlens)], dtype=msgtype.dtype)
        if xp is not numpy:
            chainer.cuda.Stream.null.synchronize()
        self.mpi_comm.Allgatherv(
            sbuf,
            [_memory_utility.get_device_memory_pointer(rbuf),
             (rlens, _cnt_to_dsp(rlens)), _get_mpi_type(msgtype)])
        ys = [rbuf[i:i + l].reshape(s)
              for i, l, s in zip(_cnt_to_dsp(rlens), rlens, shapes)]

        return tuple(ys)
コード例 #5
0
    def send(self, data, dest, tag):
        """A primitive for inter-process transmitter.

        This method sends numpy-array to target process.
        The target process is expected to invoke ``recv()``.
        This method relies on mpi4py fast communication optimized for
        numpy arrays, which discards any information attached to
        chainer.Variable objects. Please be sure.

        Args:
            data: data to be sent (tuple, list or raw numpy/cupy array)
            dest (int): Target process specifier.
            tag (int): Message ID (MPI feature).

        """
        chainer.utils.experimental(
            'chainermn.communicators.MpiCommunicatorBase.send')

        msgtype = _MessageType(data)
        _check_dtype('send', msgtype)

        """We use ssend() instead of send() to pass unittests.
        If we don't use it, an error occurs in
        test_point_to_point_communication.py
        when using MVAPICH2-2.2 and GPUs.
        """
        self.mpi_comm.ssend(msgtype, dest=dest, tag=tag)

        # Type check.
        if not msgtype.is_tuple:
            data = [data]

        for array in data:
            if numpy.float16 == array.dtype:
                array = array.astype(numpy.float32)

            if chainer.backend.get_array_module(array) is not numpy:
                chainer.cuda.Stream.null.synchronize()
                array = (_memory_utility.get_device_memory_pointer(array),
                         _get_mpi_type(msgtype))

            else:
                array = numpy.ascontiguousarray(array)

            """We use Ssend() for the same reason as using ssend()."""
            self.mpi_comm.Ssend(array, dest=dest, tag=tag)
コード例 #6
0
    def send(self, data, dest, tag):
        """A primitive for inter-process transmitter.

        This method sends numpy-array to target process.
        The target process is expected to invoke ``recv()``.
        This method relies on mpi4py fast communication optimized for
        numpy arrays, which discards any information attached to
        chainer.Variable objects. Please be sure.

        Args:
            data: data to be sent (tuple, list or raw numpy/cupy array)
            dest (int): Target process specifier.
            tag (int): Message ID (MPI feature).

        """
        chainer.utils.experimental(
            'chainermn.communicators.MpiCommunicatorBase.send')

        msgtype = _MessageType(data)
        _check_dtype('send', msgtype)

        """We use ssend() instead of send() to pass unittests.
        If we don't use it, an error occurs in
        test_point_to_point_communication.py
        when using MVAPICH2-2.2 and GPUs.
        """
        self.mpi_comm.ssend(msgtype, dest=dest, tag=tag)

        # Type check.
        if not msgtype.is_tuple:
            data = [data]

        for array in data:
            if numpy.float16 == array.dtype:
                array = array.astype(numpy.float32)

            if chainer.backend.get_array_module(array) is not numpy:
                chainer.cuda.Stream.null.synchronize()
                array = (_memory_utility.get_device_memory_pointer(array),
                         _get_mpi_type(msgtype))

            else:
                array = numpy.ascontiguousarray(array)

            """We use Ssend() for the same reason as using ssend()."""
            self.mpi_comm.Ssend(array, dest=dest, tag=tag)
コード例 #7
0
    def scatter(self, xs, root=0):
        """A primitive of inter-process scatter communication.

        This method tries to invoke scatter communication within the
        communicator. All processes in the communicator are expected to
        invoke ``scatter()``. This method relies on mpi4py fast communication
        optimized for numpy arrays, as well as ``send()`` and ``recv()``.

        If ``xs`` is tuple, each element is send to different processes.
        The length of the tuple must be the same as the communicator size.
        If ``xs`` is ``numpy.ndarrray``, it is splitted with the first
        axis and sent to different processes. For slave processes, ``xs``
        is allowed to be any value (will be ignored).

        If ``scatter()`` is invoked with cupy array in the root process,
        the returned array will be placed at current device
        (``https://docs-cupy.chainer.org/en/stable/tutorial/basic.html#current-device``)
        regardless of which device the argument is placed at remote nodes.

        Args:
            xs (tuple of numpy/cupy array): Arrays to be scattered.
            root (int): Rank of root process.

        Returns:
            ys (numpy/cupy array): Received arrays.
        """
        chainer.utils.experimental(
            'chainermn.communicators.CommunicatorBase.scatter')

        is_master = self.mpi_comm.rank == root

        if is_master:
            # Type check.
            msgtype = _MessageType(xs)
            _check_dtype('scatter', msgtype)

            if msgtype.is_tuple:
                if len(msgtype.shapes) != self.size:
                    raise ValueError(
                        'the length of xs must be consistent '
                        'with communicator size')

                xp = chainer.backend.get_array_module(*xs)
                msgtype = tuple([_MessageType(x) for x in xs])
                shapes = [mty.shapes[0] for mty in msgtype]
                # concatenate([x.reshape(-1) ... ], axis=0) will fail
                xs = xp.concatenate([x.reshape(1, -1) for x in xs], axis=1)

            else:
                assert len(msgtype.shapes) == 1

                if msgtype.shapes[0][0] != self.mpi_comm.size:
                    raise ValueError(
                        'scatter received inconsistent number of inputs '
                        'with communicator size')

                xp = chainer.backend.get_array_module(xs)
                msgtype = tuple([_MessageType(xs[0])
                                 for _ in range(self.size)])
                shapes = [xs.shape[1:] for _ in range(self.size)]

            msgtype = self.mpi_comm.scatter(msgtype, root)
            shape = msgtype.shapes[0]

            # Collective communication.
            slens = [chainer.utils.size_of_shape(s) for s in shapes]
            sbuf = _memory_utility.get_device_memory_pointer(xs)
            rbuf = xp.empty(
                [chainer.utils.size_of_shape(shape)], dtype=msgtype.dtype)
            rtype = _get_mpi_type(msgtype)
            if xp is not numpy:
                chainer.cuda.Stream.null.synchronize()

            self.mpi_comm.Scatterv(
                [sbuf, (slens, _cnt_to_dsp(slens)), _get_mpi_type(msgtype)],
                _memory_utility.array_to_buffer_object(rbuf, rtype), root)

            return rbuf.reshape(shape)

        else:  # slave processes
            msgtypes = self.mpi_comm.scatter(None, root)
            xp = msgtypes.get_array_module()
            shape = msgtypes.shapes[0]
            rbuf = xp.empty(
                [chainer.utils.size_of_shape(shape)], dtype=msgtypes.dtype)
            rtype = _get_mpi_type(msgtypes)
            self.mpi_comm.Scatterv(
                None,
                _memory_utility.array_to_buffer_object(rbuf, rtype),
                root)
            return rbuf.reshape(shape)
コード例 #8
0
    def gather(self, x, root=0):
        """A primitive of inter-process gather communication.

        This method tries to invoke gather communication within the
        communicator. All processes in the communicator are expected to
        invoke ``gather()``. This method relies on mpi4py fast communication
        optimized for numpy arrays, as well as ``send()`` and ``recv()``.

        If ``x`` is numpy array, the received data will also be allocated
        as numpy array. Additionally, when ``x`` is cupy array, the returned
        array will be placed at current device
        (``https://docs-cupy.chainer.org/en/stable/tutorial/basic.html#current-device``)
        regardless of which device the argument is placed at remote nodes.

        Args:
            x (numpy/cupy array): Array to be gathered.
            root (int): Rank of root process.

        Returns:
            ys (tuple of numpy/cupy array):
                Received arrays. ``None`` for non-root processes.
        """
        chainer.utils.experimental(
            'chainermn.communicators.MpiCommunicatorBase.gather')

        is_master = self.mpi_comm.rank == root

        msgtype = _MessageType(x)
        _check_dtype('gather', msgtype)

        msgtypes = self.mpi_comm.gather(msgtype, root)

        if is_master:
            _check_dtypes_are_same(msgtypes)

            for msgtype in msgtypes:
                if msgtype.is_tuple:
                    raise TypeError('gather cannot handle tuple data')

                assert len(msgtype.shapes) == 1

            xp = chainer.backend.get_array_module(x)
            sbuf = _memory_utility.array_to_buffer_object(
                x, _get_mpi_type(msgtype))
            shapes = [mty.shapes[0] for mty in msgtypes]
            rlens = [chainer.utils.size_of_shape(s) for s in shapes]
            rbuf = xp.empty([sum(rlens)], dtype=msgtype.dtype)

            if xp is not numpy:
                chainer.cuda.Stream.null.synchronize()

            self.mpi_comm.Gatherv(
                sbuf,
                [_memory_utility.get_device_memory_pointer(rbuf),
                 (rlens, _cnt_to_dsp(rlens)), _get_mpi_type(msgtype)],
                root)

            ys = [rbuf[i:i + l].reshape(s)
                  for i, l, s in zip(_cnt_to_dsp(rlens), rlens, shapes)]
            return tuple(ys)

        else:
            sbuf = _memory_utility.array_to_buffer_object(
                x, _get_mpi_type(msgtype))
            self.mpi_comm.Gatherv(sbuf, None, root)
            return None
コード例 #9
0
    def scatter(self, xs, root=0):
        """A primitive of inter-process scatter communication.

        This method tries to invoke scatter communication within the
        communicator. All processes in the communicator are expected to
        invoke ``scatter()``. This method relies on mpi4py fast communication
        optimized for numpy arrays, as well as ``send()`` and ``recv()``.

        If ``xs`` is tuple, each element is send to different processes.
        The length of the tuple must be the same as the communicator size.
        If ``xs`` is ``numpy.ndarrray``, it is splitted with the first
        axis and sent to different processes. For slave processes, ``xs``
        is allowed to be any value (will be ignored).

        If ``scatter()`` is invoked with cupy array in the root process,
        the returned array will be placed at current device
        (``https://docs-cupy.chainer.org/en/stable/tutorial/basic.html#current-device``)
        regardless of which device the argument is placed at remote nodes.

        Args:
            xs (tuple of numpy/cupy array): Arrays to be scattered.
            root (int): Rank of root process.

        Returns:
            ys (numpy/cupy array): Received arrays.
        """
        chainer.utils.experimental(
            'chainermn.communicators.CommunicatorBase.scatter')

        is_master = self.mpi_comm.rank == root

        if is_master:
            # Type check.
            msgtype = _MessageType(xs)
            _check_dtype('scatter', msgtype)

            if msgtype.is_tuple:
                if len(msgtype.shapes) != self.size:
                    raise ValueError(
                        'the length of xs must be consistent '
                        'with communicator size')

                xp = chainer.backend.get_array_module(*xs)
                msgtype = tuple([_MessageType(x) for x in xs])
                shapes = [mty.shapes[0] for mty in msgtype]
                # concatenate([x.reshape(-1) ... ], axis=0) will fail
                xs = xp.concatenate([x.reshape(1, -1) for x in xs], axis=1)

            else:
                assert len(msgtype.shapes) == 1

                if msgtype.shapes[0][0] != self.mpi_comm.size:
                    raise ValueError(
                        'scatter received inconsistent number of inputs '
                        'with communicator size')

                xp = chainer.backend.get_array_module(xs)
                msgtype = tuple([_MessageType(xs[0])
                                 for _ in range(self.size)])
                shapes = [xs.shape[1:] for _ in range(self.size)]

            msgtype = self.mpi_comm.scatter(msgtype, root)
            shape = msgtype.shapes[0]

            # Collective communication.
            slens = [chainer.utils.size_of_shape(s) for s in shapes]
            sbuf = _memory_utility.get_device_memory_pointer(xs)
            rbuf = xp.empty(
                [chainer.utils.size_of_shape(shape)], dtype=msgtype.dtype)
            rtype = _get_mpi_type(msgtype)
            if xp is not numpy:
                chainer.cuda.Stream.null.synchronize()

            self.mpi_comm.Scatterv(
                [sbuf, (slens, _cnt_to_dsp(slens)), _get_mpi_type(msgtype)],
                _memory_utility.array_to_buffer_object(rbuf, rtype), root)

            return rbuf.reshape(shape)

        else:  # slave processes
            msgtypes = self.mpi_comm.scatter(None, root)
            xp = msgtypes.get_array_module()
            shape = msgtypes.shapes[0]
            rbuf = xp.empty(
                [chainer.utils.size_of_shape(shape)], dtype=msgtypes.dtype)
            rtype = _get_mpi_type(msgtypes)
            self.mpi_comm.Scatterv(
                None,
                _memory_utility.array_to_buffer_object(rbuf, rtype),
                root)
            return rbuf.reshape(shape)
コード例 #10
0
    def gather(self, x, root=0):
        """A primitive of inter-process gather communication.

        This method tries to invoke gather communication within the
        communicator. All processes in the communicator are expected to
        invoke ``gather()``. This method relies on mpi4py fast communication
        optimized for numpy arrays, as well as ``send()`` and ``recv()``.

        If ``x`` is numpy array, the received data will also be allocated
        as numpy array. Additionally, when ``x`` is cupy array, the returned
        array will be placed at current device
        (``https://docs-cupy.chainer.org/en/stable/tutorial/basic.html#current-device``)
        regardless of which device the argument is placed at remote nodes.

        Args:
            x (numpy/cupy array): Array to be gathered.
            root (int): Rank of root process.

        Returns:
            ys (tuple of numpy/cupy array):
                Received arrays. ``None`` for non-root processes.
        """
        chainer.utils.experimental(
            'chainermn.communicators.MpiCommunicatorBase.gather')

        is_master = self.mpi_comm.rank == root

        msgtype = _MessageType(x)
        _check_dtype('gather', msgtype)

        msgtypes = self.mpi_comm.gather(msgtype, root)

        if is_master:
            _check_dtypes_are_same(msgtypes)

            for msgtype in msgtypes:
                if msgtype.is_tuple:
                    raise TypeError('gather cannot handle tuple data')

                assert len(msgtype.shapes) == 1

            xp = chainer.backend.get_array_module(x)
            sbuf = _memory_utility.array_to_buffer_object(
                x, _get_mpi_type(msgtype))
            shapes = [mty.shapes[0] for mty in msgtypes]
            rlens = [chainer.utils.size_of_shape(s) for s in shapes]
            rbuf = xp.empty([sum(rlens)], dtype=msgtype.dtype)

            if xp is not numpy:
                chainer.cuda.Stream.null.synchronize()

            self.mpi_comm.Gatherv(
                sbuf,
                [_memory_utility.get_device_memory_pointer(rbuf),
                 (rlens, _cnt_to_dsp(rlens)), _get_mpi_type(msgtype)],
                root)

            ys = [rbuf[i:i + l].reshape(s)
                  for i, l, s in zip(_cnt_to_dsp(rlens), rlens, shapes)]
            return tuple(ys)

        else:
            sbuf = _memory_utility.array_to_buffer_object(
                x, _get_mpi_type(msgtype))
            self.mpi_comm.Gatherv(sbuf, None, root)
            return None