Example #1
0
 def collective_fn(input_tensor, output_tensor, context):
     pygloo.allreduce(
         context, gloo_util.get_tensor_ptr(input_tensor),
         gloo_util.get_tensor_ptr(output_tensor),
         gloo_util.get_tensor_n_elements(input_tensor),
         gloo_util.get_gloo_tensor_dtype(input_tensor),
         gloo_util.get_gloo_reduce_op(allreduce_options.reduceOp))
Example #2
0
def test_redis(rank, world_size, redis_ip_address, redis_port, redis_password):
    '''
    rank  # Rank of this process within list of participating processes
    world_size  # Number of participating processes
    '''
    context = pygloo.rendezvous.Context(rank, world_size)

    attr = pygloo.transport.tcp.attr("localhost")
    # Perform rendezvous for TCP pairs
    dev = pygloo.transport.tcp.CreateDevice(attr)

    redisStore = pygloo.rendezvous.RedisStore(redis_ip_address, redis_port)

    redisStore.authorize(redis_password)
    store = pygloo.rendezvous.PrefixStore("default", redisStore)

    context.connectFullMesh(store, dev)

    print("Using RedisStore rendezvous, connect successful!!")

    sendbuf = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.float32)
    recvbuf = np.zeros_like(sendbuf, dtype=np.float32)
    sendptr = sendbuf.ctypes.data
    recvptr = recvbuf.ctypes.data

    data_size = sendbuf.size if isinstance(
        sendbuf, np.ndarray) else sendbuf.numpy().size
    datatype = pygloo.glooDataType_t.glooFloat32
    op = pygloo.ReduceOp.SUM
    algorithm = pygloo.allreduceAlgorithm.RING

    pygloo.allreduce(context, sendptr, recvptr, data_size, datatype, op,
                     algorithm)

    print(f"rank {rank} sends {sendbuf}, receives {recvbuf}")
Example #3
0
def test_multiGroup(rank, world_size, redis_ip_address, redis_port,
                    redis_password):
    '''
    test the multiGroup without prefixStore
    rank  # Rank of this process within list of participating processes
    world_size  # Number of participating processes
    '''
    groups = [f"multiGroup{i}" for i in range(3)]
    contexts = {}
    for group_name in groups:
        context = pygloo.rendezvous.Context(rank, world_size)

        attr = pygloo.transport.tcp.attr("localhost")
        # Perform rendezvous for TCP pairs
        dev = pygloo.transport.tcp.CreateDevice(attr)

        redisStore = pygloo.rendezvous.RedisStore(redis_ip_address, redis_port)

        redisStore.authorize(redis_password)

        context.connectFullMesh(redisStore, dev)
        if rank == 0:
            keys = []
            keys += [f"rank_{i}" for i in range(world_size)]
            keys += [f"{i}" for i in range(world_size)]
            redisStore.delKeys(keys)
        contexts[group_name] = context
    print("Using RedisStore rendezvous, connect successful!!")

    for group_name in groups:
        context = contexts[group_name]
        sendbuf = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.float32)
        recvbuf = np.zeros_like(sendbuf, dtype=np.float32)
        sendptr = sendbuf.ctypes.data
        recvptr = recvbuf.ctypes.data

        data_size = sendbuf.size if isinstance(
            sendbuf, np.ndarray) else sendbuf.numpy().size
        datatype = pygloo.glooDataType_t.glooFloat32
        op = pygloo.ReduceOp.SUM
        algorithm = pygloo.allreduceAlgorithm.RING

        pygloo.allreduce(context, sendptr, recvptr, data_size, datatype, op,
                         algorithm)

        print(f"rank {rank} sends {sendbuf}, receives {recvbuf}")
Example #4
0
def test_barrier(rank, world_size, fileStore_path):
    '''
    rank  # Rank of this process within list of participating processes
    world_size  # Number of participating processes
    '''
    if rank == 0:
        if os.path.exists(fileStore_path):
            shutil.rmtree(fileStore_path)
        os.makedirs(fileStore_path)
    else:
        time.sleep(0.5)

    context = pygloo.rendezvous.Context(rank, world_size)

    attr = pygloo.transport.tcp.attr("localhost")
    # Perform rendezvous for TCP pairs
    dev = pygloo.transport.tcp.CreateDevice(attr)

    fileStore = pygloo.rendezvous.FileStore(fileStore_path)
    store = pygloo.rendezvous.PrefixStore(str(world_size), fileStore)

    context.connectFullMesh(store, dev)

    sendbuf = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.float32)
    recvbuf = np.zeros_like(sendbuf, dtype=np.float32)
    sendptr = sendbuf.ctypes.data
    recvptr = recvbuf.ctypes.data

    # sendbuf = torch.Tensor([[1,2,3],[1,2,3]]).float()
    # recvbuf = torch.zeros_like(sendbuf)
    # sendptr = sendbuf.data_ptr()
    # recvptr = recvbuf.data_ptr()

    data_size = sendbuf.size if isinstance(
        sendbuf, np.ndarray) else sendbuf.numpy().size
    datatype = pygloo.glooDataType_t.glooFloat32
    op = pygloo.ReduceOp.SUM
    algorithm = pygloo.allreduceAlgorithm.RING

    pygloo.allreduce(context, sendptr, recvptr, data_size, datatype, op,
                     algorithm)
    pygloo.barrier(context)
    print(f"rank {rank} sends {sendbuf}, receives {recvbuf}")