def broadcast(tensor): """Returns a tensor that can be efficiently transferred to other devices. Args: tensor: The tensor to send; must be assigned to a GPU device. Returns: A tensor with the value of `src_tensor`, which can be used as input to ops on other GPU devices. """ _check_device(tensor) with ops.device(tensor.device): return gen_nccl_ops.nccl_broadcast(input=tensor, shape=tensor.shape)
def _reduce_sum_grad(op, grad): """The gradients for input `Operation` of `reduce_sum`. Args: op: The `sum send` `Operation` that we are differentiating. grad: Gradient with respect to the output of the `reduce_sum` op. Returns: The gradient with respect to the input of `reduce_sum` op. Raises: LookupError: If the reduction attribute of op is not `sum`. """ if op.get_attr('reduction') != b'sum': raise LookupError('No gradient defined for NcclReduce except sum.') _check_device(grad, expected=op.device) with ops.device(op.device): result = gen_nccl_ops.nccl_broadcast(input=grad, shape=grad.shape) return [result] * len(op.inputs)