コード例 #1
0
ファイル: collective.py プロジェクト: yuan776/ray
def recv_multigpu(tensor,
                  src_rank: int,
                  src_gpu_index: int,
                  group_name: str = "default"):
    """Receive a tensor from a remote GPU synchronously.

    The function asssume each process owns >1 GPUs, and the sender
    process and receiver process has equal nubmer of GPUs.

    Args:
        tensor: the received tensor, located on a GPU.
        src_rank (int): the rank of the source process.
        src_gpu_index (int): the index of the source gpu on the src process.
        group_name (str): the name of the collective group.

    Returns:
        None
    """
    if not types.cupy_available():
        raise RuntimeError("recv_multigpu call requires NCCL.")
    _check_single_tensor_input(tensor)
    g = _check_and_get_group(group_name)
    _check_rank_valid(g, src_rank)
    if src_rank == g.rank:
        raise RuntimeError("The dst_rank '{}' is self. Considering "
                           "doing GPU to GPU memcpy instead?".format(src_rank))
    opts = types.RecvOptions()
    opts.src_rank = src_rank
    opts.src_gpu_index = src_gpu_index
    g.recv([tensor], opts)
コード例 #2
0
ファイル: collective.py プロジェクト: yuan776/ray
def reducescatter_multigpu(output_tensor_list,
                           input_tensor_lists,
                           group_name: str = "default",
                           op=types.ReduceOp.SUM):
    """Reducescatter a list of tensors across all GPUs.

    Args:
        output_tensor_list: the resulted list of tensors, with
            shape: num_gpus * shape(tensor).
        input_tensor_lists: the original tensors, with shape:
            num_gpus * world_size * shape(tensor).
        group_name (str): the name of the collective group.
        op: The reduce operation.

    Returns:
        None.
    """
    if not types.cupy_available():
        raise RuntimeError("Multigpu calls requires NCCL and Cupy.")
    _check_tensor_lists_input(input_tensor_lists)
    _check_tensor_list_input(output_tensor_list)
    g = _check_and_get_group(group_name)
    opts = types.ReduceScatterOptions()
    opts.reduceOp = op
    g.reducescatter(output_tensor_list, input_tensor_lists, opts)
コード例 #3
0
ファイル: collective.py プロジェクト: yuan776/ray
def send_multigpu(tensor,
                  dst_rank: int,
                  dst_gpu_index: int,
                  group_name: str = "default"):
    """Send a tensor to a remote GPU synchronously.

    The function asssume each process owns >1 GPUs, and the sender
    process and receiver process has equal nubmer of GPUs.

    Args:
        tensor: the tensor to send, located on a GPU.
        dst_rank (int): the rank of the destination process.
        dst_gpu_index (int): the destination gpu index.
        group_name (str): the name of the collective group.

    Returns:
        None
    """
    if not types.cupy_available():
        raise RuntimeError("send_multigpu call requires NCCL.")
    _check_single_tensor_input(tensor)
    g = _check_and_get_group(group_name)
    _check_rank_valid(g, dst_rank)
    if dst_rank == g.rank:
        raise RuntimeError("The dst_rank '{}' is self. Considering "
                           "doing GPU to GPU memcpy instead?".format(dst_rank))
    opts = types.SendOptions()
    opts.dst_rank = dst_rank
    opts.dst_gpu_index = dst_gpu_index
    g.send([tensor], opts)
コード例 #4
0
ファイル: collective.py プロジェクト: yuan776/ray
def broadcast_multigpu(tensor_list,
                       src_rank: int = 0,
                       src_tensor: int = 0,
                       group_name: str = "default"):
    """Broadcast the tensor from a source GPU to all other GPUs.

    Args:
        tensor_list: the tensors to broadcast (src) or receive (dst).
        src_rank (int): the rank of the source process.
        src_tensor (int): the index of the source GPU on the source process.
        group_name (str): the collective group name to perform broadcast.

    Returns:
        None
    """
    if not types.cupy_available():
        raise RuntimeError("Multigpu calls requires NCCL and Cupy.")
    _check_tensor_list_input(tensor_list)
    g = _check_and_get_group(group_name)

    # check src rank
    _check_rank_valid(g, src_rank)
    _check_root_tensor_valid(len(tensor_list), src_tensor)
    opts = types.BroadcastOptions()
    opts.root_rank = src_rank
    opts.root_tensor = src_tensor
    g.broadcast(tensor_list, opts)
コード例 #5
0
ファイル: collective.py プロジェクト: yuan776/ray
def reduce_multigpu(tensor_list: list,
                    dst_rank: int = 0,
                    dst_tensor: int = 0,
                    group_name: str = "default",
                    op=types.ReduceOp.SUM):
    """Reduce the tensor across the group to the destination rank
    and destination tensor.

    Args:
        tensor_list: the list of tensors to be reduced on this process;
            each tensor located on a GPU.
        dst_rank (int): the rank of the destination process.
        dst_tensor: the index of GPU at the destination.
        group_name (str): the collective group name to perform reduce.
        op: The reduce operation.

    Returns:
        None
    """
    if not types.cupy_available():
        raise RuntimeError("Multigpu calls requires NCCL and Cupy.")
    _check_tensor_list_input(tensor_list)
    g = _check_and_get_group(group_name)

    # check dst rank
    _check_rank_valid(g, dst_rank)
    _check_root_tensor_valid(len(tensor_list), dst_tensor)
    opts = types.ReduceOptions()
    opts.reduceOp = op
    opts.root_rank = dst_rank
    opts.root_tensor = dst_tensor
    g.reduce(tensor_list, opts)
コード例 #6
0
ファイル: collective.py プロジェクト: yuan776/ray
def _check_single_tensor_input(tensor):
    """Check if the tensor is with a supported type."""
    if isinstance(tensor, np.ndarray):
        return
    if types.cupy_available():
        if isinstance(tensor, types.cp.ndarray):
            return
    if types.torch_available():
        if isinstance(tensor, types.th.Tensor):
            return
    raise RuntimeError("Unrecognized tensor type '{}'. Supported types are: "
                       "np.ndarray, torch.Tensor, cupy.ndarray.".format(
                           type(tensor)))
コード例 #7
0
ファイル: collective.py プロジェクト: stjordanis/ray
def synchronize(gpu_id: int):
    """Synchronize the current process to a give device.

    Args:
        gpu_id (int): the GPU device id to synchronize.

    Returns:
        None
    """
    if not types.cupy_available():
        raise RuntimeError("synchronize call requires CUDA and NCCL.")
    import cupy as cp
    cp.cuda.Device(gpu_id).synchronize()
コード例 #8
0
ファイル: collective.py プロジェクト: yuan776/ray
def allreduce_multigpu(tensor_list: list,
                       group_name: str = "default",
                       op=types.ReduceOp.SUM):
    """Collective allreduce a list of tensors across the group.

    Args:
        tensor_list (List[tensor]): list of tensors to be allreduced,
            each on a GPU.
        group_name (str): the collective group name to perform allreduce.

    Returns:
        None
    """
    if not types.cupy_available():
        raise RuntimeError("Multigpu calls requires NCCL and Cupy.")
    _check_tensor_list_input(tensor_list)
    g = _check_and_get_group(group_name)
    opts = types.AllReduceOptions
    opts.reduceOp = op
    g.allreduce(tensor_list, opts)
コード例 #9
0
ファイル: collective.py プロジェクト: yuan776/ray
def allgather_multigpu(output_tensor_lists: list,
                       input_tensor_list: list,
                       group_name: str = "default"):
    """Allgather tensors from each gpus of the group into lists.

    Args:
        output_tensor_lists (List[List[tensor]]): gathered results, with shape
            must be num_gpus * world_size * shape(tensor).
        input_tensor_list: (List[tensor]): a list of tensors, with shape
            num_gpus * shape(tensor).
        group_name (str): the name of the collective group.

    Returns:
        None
    """
    if not types.cupy_available():
        raise RuntimeError("Multigpu calls requires NCCL and Cupy.")
    _check_tensor_lists_input(output_tensor_lists)
    _check_tensor_list_input(input_tensor_list)
    g = _check_and_get_group(group_name)
    opts = types.AllGatherOptions()
    g.allgather(output_tensor_lists, input_tensor_list, opts)