예제 #1
0
def _tensors_allreduce_ps(degree, mean, allgather, allreduce, allreduce_filter,
                          grad, ps_parameter):
    """
    Apply allreduce on gradient.

    Args:
        degree (int): The mean coefficient.
        mean (bool): When mean is true, the mean coefficient (degree) would apply on gradients.
        allgather (Primitive): The communication operator for sparse gradients.
        allreduce (Primitive): The communication operator for gradients.
        allreduce_filter (bool): When it is true, allreduce would apply.
        grad (Tensor): The gradient tensor before operation.
        ps_parameter (bool): Use parameter server or not.

    Returns:
        Tensor, the gradient tensor after operation.
    """
    if ps_parameter:
        return grad

    if allreduce_filter:
        grad = allreduce(grad)
        if mean:
            grad = F.tensor_mul(grad, F.cast(degree, F.dtype(grad)))
        return grad
    return grad
예제 #2
0
def _tensors_allreduce_with_sparse_ps(degree, mean, allgather, allreduce,
                                      allreduce_filter, grad, ps_parameter):
    """
    Apply allgather on gradient instead of allreduce for sparse feature.
    Allgather is a communication operation used for distributed deep learning.

    Args:
        degree (int): The mean coefficient.
        mean (bool): When mean is true, the mean coefficient (degree) would apply on gradients.
        allgather (Primitive): The communication operator for sparse gradients.
        allreduce (Primitive): The communication operator for gradients.
        allreduce_filter (bool): When it is true, allgather would apply.
        grad (tuple): The indices, gradient tensor and tensor_shape before operation.
        ps_parameter (bool): Use parameter server or not.

    Returns:
        RowTensor, the gradient after operation.
    """
    if ps_parameter:
        return grad

    if allreduce_filter:
        indices = allgather(grad.indices)
        dout = allgather(grad.values)
        if mean:
            dout = F.tensor_mul(dout, F.cast(degree, F.dtype(dout)))
        grad = RowTensor(indices, dout, grad.dense_shape)
    return grad
def tensor_grad_scale(scale, grad, accu_grad):
    #mul = P.Mul()
    new_grad = accu_grad * reciprocal(scale)
    zeros = F.tensor_mul(accu_grad, 0.0)
    clear = F.assign(accu_grad, zeros)
    F.control_depend(new_grad, clear)
    F.control_depend(grad, new_grad)
    return new_grad
예제 #4
0
 def bprop(x, out, dout):
     if fusion == 0:
         dx = reduce_scatter(dout)
     else:
         grad = all_reduce(dout)
         dx = split(grad)[rank]
         if mean_flag:
             dx = F.tensor_mul(dx, scale)
     return (dx, )
예제 #5
0
 def bprop(x, z, out, dout):
     if do_mirror:
         if mean_flag:
             z = F.depend(z, F.assign_add(z, dout))
             grad = all_reduce(z)
             dx = split(grad)[rank]
             dx = F.tensor_mul(dx, scale)
         else:
             z = F.depend(z, F.assign_add(z, dout))
             grad = all_reduce(z)
             dx = split(grad)[rank]
     else:
         dx = dout
     return (dx, zeros_like(z))
예제 #6
0
def _tensors_allreduce(degree, mean, allgather, allreduce, allreduce_filter,
                       grad):
    """
    Apply allreduce on gradient.

    Args:
        degree (int): The mean coefficient.
        mean (bool): When mean is true, the mean coefficient (degree) would apply on gradients.
        allgather (Primitive): The communication operator for sparse gradients.
        allreduce (Primitive): The communication operator for gradients.
        allreduce_filter (bool): When it is true, allreduce would apply.
        grad (Tensor): The gradient tensor before operation.

    Returns:
        Tensor, the gradient tensor after operation.
    """
    if allreduce_filter:
        grad = allreduce(grad)
        if mean:
            degree = F.scalar_cast(degree, F.dtype(grad))
            grad = F.tensor_mul(
                grad, F.cast(F.scalar_to_array(1.0 / degree), F.dtype(grad)))
        return grad
    return grad
예제 #7
0
 def construct(self, t1, t2):
     z = F.tensor_mul(t1, t2)
     return z
예제 #8
0
 def construct(self, t):
     z = F.tensor_mul(t, self.f)
     return z