예제 #1
0
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main

paddle.enable_static()


class TestCollectiveAllgatherAPI(TestCollectiveAPIRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program, rank):
        with fluid.program_guard(main_prog, startup_program):
            tensor_list = []
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float32')
            paddle.distributed.all_gather(tensor_list, tindata)
            return tensor_list


if __name__ == "__main__":
    runtime_main(TestCollectiveAllgatherAPI, "allgather")
예제 #2
0
            seed = os.getpid()
            np.random.seed(seed)
            in_feat = 2
            n_expert = 2
            world_size = 2
            tot_expert = n_expert * world_size
            local_expert_count = np.random.randint(
                1, 4, size=tot_expert).astype("int")
            fwd_expert_count = sum(local_expert_count)
            local_input_buf = np.random.rand(fwd_expert_count,
                                             in_feat).astype("float32")
            local_expert_count = paddle.to_tensor(local_expert_count)
            local_input_buf = paddle.to_tensor(local_input_buf)
            global_expert_count = []
            paddle.distributed.alltoall(
                paddle.split(
                    local_expert_count, 2, axis=0),
                global_expert_count)
            global_expert_count = paddle.concat(global_expert_count, axis=0)
            local_input_buf.stop_gradient = False
            output = paddle.distributed.utils.global_scatter(
                local_input_buf, local_expert_count, global_expert_count)
            output.stop_gradient = False
            c = output * output
            c.backward()
            return [output.numpy(), local_input_buf.grad.numpy()]


if __name__ == "__main__":
    runtime_main(TestCollectiveGlobalScatterAPI, "global_scatter")
예제 #3
0
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main

paddle.enable_static()


class TestCollectiveAllToAllAPI(TestCollectiveAPIRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program, rank):
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float32')
            tindata = paddle.split(tindata, 2, axis=0)
            tout_data = []
            paddle.distributed.alltoall(tindata, tout_data)
            return tout_data


if __name__ == "__main__":
    runtime_main(TestCollectiveAllToAllAPI, "alltoall")
예제 #4
0
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main

paddle.enable_static()


class TestCollectiveScatterAPI(TestCollectiveAPIRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program, rank):
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float64',
                                  append_batch_size=False)
            toutdata = layers.fill_constant(shape=[5, 1000],
                                            dtype='float64',
                                            value=1.0)
            tensor_list = None
            if rank == 1:
                tensor_list = paddle.split(tindata, 2, axis=0)
            paddle.distributed.scatter(toutdata, tensor_list, src=1)
            return [toutdata]


if __name__ == "__main__":
    runtime_main(TestCollectiveScatterAPI, "scatter")
            data = paddle.static.data(name='tindata',
                                      shape=[10, 1000],
                                      dtype="float32")
            paddle.distributed.broadcast(data, src=0)
            data = paddle.split(data, 2, axis=1)[rank]
            if rank == 0:
                param_attr = paddle.fluid.ParamAttr(
                    initializer=paddle.fluid.initializer.NumpyArrayInitializer(
                        np_array[0:500, :]), )
            else:
                param_attr = paddle.fluid.ParamAttr(
                    initializer=paddle.fluid.initializer.NumpyArrayInitializer(
                        np_array[500:1000, :]), )

            linear_out = paddle.distributed.split(
                data,
                size=(1000, 16),
                operation='linear',
                axis=0,
                num_partitions=2,
                weight_attr=param_attr,
                bias_attr=True,
            )

            return [linear_out]


if __name__ == "__main__":
    runtime_main(TestRowParallelLinearAPI, "row_parallel_linear")
예제 #6
0
            fleet.init(is_collective=True)
            np.random.seed(2020)
            np_array = np.random.rand(1000, 16)

            data = paddle.static.data(
                name='tindata', shape=[10, 1000], dtype="float32")
            paddle.distributed.broadcast(data, src=0)
            if rank == 0:
                param_attr = paddle.fluid.ParamAttr(
                    initializer=paddle.fluid.initializer.NumpyArrayInitializer(
                        np_array[:, 0:8]), )
            else:
                param_attr = paddle.fluid.ParamAttr(
                    initializer=paddle.fluid.initializer.NumpyArrayInitializer(
                        np_array[:, 8:16]), )

            linear_out = paddle.distributed.split(
                data,
                size=(1000, 16),
                operation='linear',
                axis=1,
                num_partitions=2,
                weight_attr=param_attr,
                bias_attr=True, )

            return [linear_out]


if __name__ == "__main__":
    runtime_main(TestColumnParallelLinearAPI, "column_parallel_linear")
예제 #7
0
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main

paddle.enable_static()


class TestCollectiveSendRecvAPI(TestCollectiveAPIRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program, rank):
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(
                name="tindata",
                shape=[10, 1000],
                dtype='float32',
                append_batch_size=False)
            if rank == 0:
                paddle.distributed.send(tindata, dst=1)
            else:
                paddle.distributed.recv(tindata, src=0)
            return [tindata]


if __name__ == "__main__":
    runtime_main(TestCollectiveSendRecvAPI, "sendrecv")
예제 #8
0
            # (num_embeddings, embedding_dim) = (12, 8)
            size = (12, 8)
            np_array = np.random.rand(size[0], size[1])
            paddle.seed(2020)
            data_in = paddle.randint(0, size[0], shape=(10, 4))

            data = paddle.static.data(
                name='tindata', shape=[10, 1000], dtype="float32")
            per_part_size = size[0] // 2
            if rank == 0:
                param_attr = paddle.fluid.ParamAttr(
                    initializer=paddle.fluid.initializer.NumpyArrayInitializer(
                        np_array[0:per_part_size, :]), )
            else:
                param_attr = paddle.fluid.ParamAttr(
                    initializer=paddle.fluid.initializer.NumpyArrayInitializer(
                        np_array[per_part_size:size[0], :]), )

            emb_out = paddle.distributed.split(
                data_in,
                size,
                operation="embedding",
                num_partitions=2,
                weight_attr=param_attr)

            return [data_in, emb_out]


if __name__ == "__main__":
    runtime_main(TestParallelEmbeddingAPI, "parallel_embedding")
예제 #9
0
    def get_model(self, main_prog, startup_program, rank):
        with fluid.program_guard(main_prog, startup_program):
            fleet.init(is_collective=True)
            np.random.seed(2020)
            np_array = np.random.rand(9, 8)
            paddle.seed(2020)
            data_in = paddle.randint(0, 7, shape=(10, 4))

            data = paddle.static.data(name='tindata',
                                      shape=[10, 1000],
                                      dtype="float32")
            if rank == 0:
                param_attr = paddle.fluid.ParamAttr(
                    initializer=paddle.fluid.initializer.NumpyArrayInitializer(
                        np_array[0:5, :]), )
            else:
                param_attr = paddle.fluid.ParamAttr(
                    initializer=paddle.fluid.initializer.NumpyArrayInitializer(
                        np_array[5:9, :]), )

            emb_out = paddle.distributed.split(data_in, (7, 8),
                                               operation="embedding",
                                               num_partitions=2,
                                               weight_attr=param_attr)

            return [data_in, emb_out]


if __name__ == "__main__":
    runtime_main(TestParallelEmbeddingAPINoneDivisible, "parallel_embedding")
예제 #10
0
import socket
from contextlib import closing
from six import string_types
import math
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main

paddle.enable_static()


class TestCollectiveBarrierAPI(TestCollectiveAPIRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program, rank):
        with fluid.program_guard(main_prog, startup_program):
            paddle.distributed.barrier()
            return []


if __name__ == "__main__":
    runtime_main(TestCollectiveBarrierAPI, "barrier")
import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main

paddle.enable_static()


class TestCollectiveAllreduceNewGroupAPI(TestCollectiveAPIRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program, rank):
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float32')
            gp = paddle.distributed.new_group([0, 1])
            paddle.distributed.all_reduce(tindata,
                                          group=gp,
                                          use_calc_stream=False)
            return [tindata]


if __name__ == "__main__":
    runtime_main(TestCollectiveAllreduceNewGroupAPI, "allreduce")
예제 #12
0
from six import string_types
import math
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main

paddle.enable_static()


class TestCollectiveAllreduceAPI(TestCollectiveAPIRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program, rank):
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(
                name="tindata", shape=[10, 1000], dtype='float32')
            paddle.distributed.all_reduce(tindata)
            return [tindata]


if __name__ == "__main__":
    runtime_main(TestCollectiveAllreduceAPI, "allreduce")
예제 #13
0
import socket
from contextlib import closing
from six import string_types
import math
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main


class TestCollectiveReduceAPI(TestCollectiveAPIRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program, rank):
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(
                name="tindata", shape=[10, 1000], dtype='float32')
            paddle.distributed.reduce(tindata, dst=0)
            return [tindata]


if __name__ == "__main__":
    runtime_main(TestCollectiveReduceAPI, "reduce")
            np.random.seed(seed)
            in_feat = 2
            n_expert = 2
            world_size = 2
            tot_expert = n_expert * world_size
            local_expert_count = np.random.randint(
                1, 4, size=tot_expert).astype("int")
            local_expert_count = paddle.to_tensor(local_expert_count)
            global_expert_count = []
            paddle.distributed.alltoall(
                paddle.split(local_expert_count, 2, axis=0),
                global_expert_count)
            global_expert_count = paddle.concat(global_expert_count, axis=0)
            fwd_expert_count = sum(global_expert_count)
            np.random.seed(seed)
            local_input_buf = np.random.rand(fwd_expert_count,
                                             in_feat).astype("float32")
            local_input_buf = paddle.to_tensor(local_input_buf)
            local_input_buf.stop_gradient = False
            output = paddle.distributed.utils.global_gather(
                local_input_buf, local_expert_count, global_expert_count)
            output.stop_gradient = False
            c = output * output
            c.stop_gradient = False
            c.backward()
            return [output.numpy(), local_input_buf.grad.numpy()]


if __name__ == "__main__":
    runtime_main(TestCollectiveGlobalGatherAPI, "global_gather")
import math
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main

paddle.enable_static()


class TestCollectiveBroadcastAPI(TestCollectiveAPIRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program, rank):
        with fluid.program_guard(main_prog, startup_program):
            tindata = layers.data(name="tindata",
                                  shape=[10, 1000],
                                  dtype='float32')
            paddle.distributed.broadcast(tindata, src=1)
            return [tindata]


if __name__ == "__main__":
    runtime_main(TestCollectiveBroadcastAPI, "broadcast")