import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_base import TestCollectiveRunnerBase, runtime_main

paddle.enable_static()


class TestCollectiveReduceScatter(TestCollectiveRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program):
        ring_id = 0
        nranks = 2
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float32')
            toutdata = fluid.layers.collective._c_reducescatter(
                tindata, nranks)
            return toutdata


if __name__ == "__main__":
    runtime_main(TestCollectiveReduceScatter, "reducescatter", 0)
Ejemplo n.º 2
0
                                  shape=[10, 1000],
                                  dtype='float64',
                                  append_batch_size=False)
            if self.rank == 0:
                main_prog.global_block().append_op(type="send_v2",
                                                   inputs={'X': tindata},
                                                   attrs={
                                                       'ring_id': ring_id,
                                                       'peer': 1,
                                                       'use_calc_stream': True,
                                                       'dynamic_shape': True
                                                   })
            else:
                main_prog.global_block().append_op(type="recv_v2",
                                                   outputs={'Out': tindata},
                                                   attrs={
                                                       'peer': 0,
                                                       'ring_id': ring_id,
                                                       'dtype': tindata.dtype,
                                                       'out_shape':
                                                       tindata.shape,
                                                       'use_calc_stream': True,
                                                       'dynamic_shape': True
                                                   })
            return tindata


if __name__ == "__main__":
    runtime_main(TestCollectiveSendRecvDynamicShape, "sendrecv_dynamic_shape",
                 0)
Ejemplo n.º 3
0
class TestCollectiveConcat(TestCollectiveRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program):
        ring_id = 0
        nranks = 2
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float32')
            toutdata = main_prog.current_block().create_var(
                name="outofconcat",
                dtype='float32',
                type=core.VarDesc.VarType.LOD_TENSOR,
                persistable=False,
                stop_gradient=False)
            main_prog.global_block().append_op(type="c_concat",
                                               inputs={'X': tindata},
                                               attrs={
                                                   'ring_id': ring_id,
                                                   'rank': self.rank,
                                                   'nranks': nranks
                                               },
                                               outputs={'Out': toutdata})
            return toutdata


if __name__ == "__main__":
    runtime_main(TestCollectiveConcat, "concat", 0)
Ejemplo n.º 4
0
    def get_model(self, main_prog, startup_program):
        ring_id = self.global_ring_id
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(
                name="tindata", shape=[10, 1000], dtype='float64')
            if self.rank == 0:
                main_prog.global_block().append_op(
                    type="send_v2",
                    inputs={'X': tindata},
                    attrs={
                        'ring_id': ring_id,
                        'peer': 1,
                        'use_calc_stream': True
                    })
            else:
                main_prog.global_block().append_op(
                    type="recv_v2",
                    outputs={'Out': tindata},
                    attrs={
                        'peer': 0,
                        'ring_id': ring_id,
                        'dtype': tindata.dtype,
                        'out_shape': tindata.shape,
                        'use_calc_stream': True,
                    })
            return tindata


if __name__ == "__main__":
    runtime_main(TestCollectiveSendRecv, "sendrecv", 0)
Ejemplo n.º 5
0
    def get_model(self, main_prog, startup_program):
        ring_id = 0
        rootid = 1
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float32')
            toutdata = main_prog.current_block().create_var(
                name="outofreduce",
                dtype='float32',
                type=core.VarDesc.VarType.LOD_TENSOR,
                persistable=False,
                stop_gradient=False)
            main_prog.global_block().append_op(type="c_reduce_sum",
                                               inputs={'X': tindata},
                                               attrs={
                                                   'ring_id': ring_id,
                                                   'use_calc_stream': True,
                                                   'root_id': rootid
                                               },
                                               outputs={'Out': toutdata})
            main_prog.global_block().append_op(type="c_sync_comm_stream",
                                               inputs={'X': toutdata},
                                               outputs={'Out': toutdata},
                                               attrs={'ring_id': ring_id})
            return toutdata


if __name__ == "__main__":
    runtime_main(TestCollectiveReduce, "reduce", 0)
Ejemplo n.º 6
0
    def get_model(self, main_prog, startup_program):
        ring_id = 0
        nranks = 2
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float32')
            toutdata = main_prog.current_block().create_var(
                name="outofgather",
                dtype='float32',
                type=core.VarDesc.VarType.LOD_TENSOR,
                persistable=False,
                stop_gradient=False)
            main_prog.global_block().append_op(type="c_allgather",
                                               inputs={'X': tindata},
                                               attrs={
                                                   'ring_id': ring_id,
                                                   'nranks': nranks
                                               },
                                               outputs={'Out': toutdata})
            main_prog.global_block().append_op(type="c_sync_comm_stream",
                                               inputs={'X': toutdata},
                                               outputs={'Out': toutdata},
                                               attrs={'ring_id': ring_id})
            return toutdata


if __name__ == "__main__":
    runtime_main(TestCollectiveAllGather, "allgather", 0)
Ejemplo n.º 7
0
class TestCollectiveAllGather(TestCollectiveRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program):
        ring_id = 0
        nranks = 2
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float32')
            toutdata = main_prog.current_block().create_var(
                name="outofsplit",
                dtype='float32',
                type=core.VarDesc.VarType.LOD_TENSOR,
                persistable=False,
                stop_gradient=False)
            main_prog.global_block().append_op(type="c_split",
                                               inputs={'X': tindata},
                                               attrs={
                                                   'ring_id': ring_id,
                                                   'rank': self.rank,
                                                   'nranks': nranks
                                               },
                                               outputs={'Out': toutdata})
            return toutdata


if __name__ == "__main__":
    runtime_main(TestCollectiveAllGather, "split", 0)
    def get_model(self, main_prog, startup_program):
        ring_id = 0
        rootid = 1
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float32')
            toutdata = main_prog.current_block().create_var(
                name="outofreduce",
                dtype='float32',
                type=core.VarDesc.VarType.LOD_TENSOR,
                persistable=False,
                stop_gradient=False)
            main_prog.global_block().append_op(type="c_scatter",
                                               inputs={'X': tindata},
                                               attrs={
                                                   'ring_id': ring_id,
                                                   'root': rootid,
                                                   'nranks': 2
                                               },
                                               outputs={'Out': toutdata})
            main_prog.global_block().append_op(type="c_sync_comm_stream",
                                               inputs={'X': toutdata},
                                               outputs={'Out': toutdata},
                                               attrs={'ring_id': ring_id})
            return toutdata


if __name__ == "__main__":
    runtime_main(TestCollectiveScatter, "scatter", 0)
Ejemplo n.º 9
0
    def get_model(self, main_prog, startup_program):
        ring_id = 0
        rootid = 1
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float32')
            toutdata = main_prog.current_block().create_var(
                name="outofbroadcast",
                dtype='float32',
                type=core.VarDesc.VarType.LOD_TENSOR,
                persistable=False,
                stop_gradient=False)
            main_prog.global_block().append_op(type="c_broadcast",
                                               inputs={'X': tindata},
                                               attrs={
                                                   'ring_id': ring_id,
                                                   'root': rootid
                                               },
                                               outputs={'Out': toutdata})
            main_prog.global_block().append_op(type="c_sync_comm_stream",
                                               inputs={'X': toutdata},
                                               outputs={'Out': toutdata},
                                               attrs={'ring_id': ring_id})
            return toutdata


if __name__ == "__main__":
    runtime_main(TestCollectiveBroadcast, "broadcast", 0)
            main_prog.global_block().append_op(type="c_wait_comm",
                                               inputs={'X': toutdata},
                                               outputs={'Out': toutdata},
                                               attrs={'ring_id': ring_id})

            # tout = tin + tout - tin = tout
            if True:
                main_prog.global_block().append_op(
                    type="elementwise_add",
                    inputs={
                        'X': tindata,
                        'Y': toutdata,
                    },
                    outputs={'Out': toutdata},
                )
                main_prog.global_block().append_op(
                    type="elementwise_sub",
                    inputs={
                        'X': toutdata,
                        'Y': tindata,
                    },
                    outputs={'Out': toutdata},
                )

            return toutdata


if __name__ == "__main__":
    runtime_main(TestCollectiveAllreduce, "allreduce", 0)