Beispiel #1
0
def _tensors_allreduce_with_sparse(degree, mean, allgather, allreduce_filter,
                                   grad, allreduce):
    """
    Apply allgather on gradient instead of allreduce for sparse feature.
    Allgather is a communication operation used for distributed deep learning.

    Args:
        degree (int): The mean coefficient.
        mean (bool): When mean is true, the mean coefficient (degree) would apply on gradients.
        allgather (Primitive): The communication operator for sparse gradients.
        allreduce_filter (bool): When it is true, allgather would apply.
        grad (IndexedSlices): The gradient before operation.
        allreduce (Primitive): The communication operator for gradients.

    Returns:
        IndexedSlices, the gradient after operation.
    """
    if allreduce_filter:
        indices = allgather(grad.indices())
        dout = allgather(grad.values())
        if mean:
            degree = F.scalar_cast(degree, F.dtype(grad.values()))
            cast_op = P.Cast()
            mul_op = P.Mul()
            dout = mul_op(
                dout, cast_op(F.scalar_to_array(1.0 / degree), F.dtype(dout)))
        grad = IndexedSlices(indices, dout, grad.dense_shape())
    return grad
Beispiel #2
0
def _tensor_apply_decay_with_sparse(weight_decay, if_apply, weight, gradient):
    """Get grad with weight_decay."""
    if if_apply:
        indices = gradient.indices()
        values = op_add((op_gather(weight, indices, 0) * weight_decay, gradient.values()))
        shape = gradient.dense_shape()
        return IndexedSlices(indices, values, shape)
    return gradient
Beispiel #3
0
def _tensors_cast_datatype_with_sparse(datatype, grad):
    """
    Cast gradient to datatype.

    Args:
        datatype (mstype): the destination datatype of gradient.
        grad (IndexedSlices): The gradient before operation.

    Returns:
        IndexedSlices, the gradient after operation.
    """
    dout = F.cast(grad.values(), datatype)
    return IndexedSlices(grad.indices(), dout, grad.dense_shape())
Beispiel #4
0
def tensor_grad_scale_with_sparse(scale, grad):
    """Get grad with scale."""
    if scale == 1.0:
        return grad
    return IndexedSlices(grad.indices(), grad.values() * scale, grad.dense_shape())
 def bprop(x, indices, axis, out, dout):
     return IndexedSlices(indices, dout, x), axis, out