Esempio n. 1
0
def laplacian_of_gaussian_ad(head, x):
    """2nd derivative of gaussian, which should be the same as laplacian of gaussian filter."""
    y = gaussian(x)
    # 1st derivative
    dx = list(akg.differentiate(y, [x], head))
    head_fake = akg.tvm.compute(x.shape,
                                lambda *ind: akg.tvm.const(1.0, dtype=y.dtype))
    # 2nd derivative
    dx2 = list(akg.differentiate(dx[0], [x], head_fake))
    return dx2[0]
Esempio n. 2
0
def bias_add_ad_v2(head, input_shape, data_format, target=utils.CCE):
    """Compute gradient for bias_add operator using automatic differentiate."""
    check_list = ["NHWC", "NC1HWC0", "DefaultFormat"]
    if data_format not in check_list:
        raise RuntimeError(
            "bias_add_grad only support %s while dataformat is %s" %
            (",".join(check_list), data_format))
    head_plh = akg.tvm.placeholder(head.shape, head.dtype, "head_plh")
    if data_format == "NC1HWC0":
        bias_shape = (1, head.shape[1], 1, 1, head.shape[4])
        bias_plh = akg.tvm.placeholder(bias_shape, head.dtype, "bias_plh")
    elif data_format == "NHWC":
        bias_shape = (input_shape[-1], )
        bias_plh = akg.tvm.placeholder(bias_shape, head.dtype, "bias_plh")
    else:
        bias_shape = (input_shape[1], )
        bias_plh = akg.tvm.placeholder(bias_shape, head.dtype, "bias_plh")
    bias_add_res = bias_add(head_plh, bias_plh, data_format)

    shape1 = [x.value for x in head_plh.shape]
    shape2 = [x.value for x in bias_plh.shape]

    def custom_bias_add_diff(out, input_data, head, ad_attrs, new_pld_array):
        if len(shape2) != 1:
            raise RuntimeError("Default Format needs Bias is a 1D Tensor!")
        if data_format == "NHWC":
            return [akg.tvm.compute(shape2, lambda l: head[0, 0, 0, l])]
        if data_format == "DefaultFormat":
            if len(shape1) == 2:
                return [akg.tvm.compute(shape2, lambda l: head[0, l])]
            if len(shape1) == 4:
                return [akg.tvm.compute(shape2, lambda l: head[0, l, 0, 0])]
            raise RuntimeError(
                "bias_add only support 2D and 4D shape while dataformat is DefaultFormat"
            )
        return None

    if data_format == "NC1HWC0":
        jacs = list(akg.differentiate(bias_add_res, [bias_plh], head))
    else:
        variables = akg.get_variables("reshape_diff")
        jacs = list(
            akg.differentiate(
                bias_add_res, [bias_plh],
                head,
                None,
                None,
                override={variables[0]: (variables[1], custom_bias_add_diff)}))

    return jacs[0]
Esempio n. 3
0
def lstmcell_c_ad(_input,
                  hx,
                  cx,
                  w_ih,
                  w_hh,
                  b_ih,
                  b_hh,
                  Head,
                  input_id,
                  target="cce"):
    _, forward_c_op = lstmcell(_input, hx, cx, w_ih, w_hh, b_ih, b_hh)

    tensor_list = [_input, hx, cx, w_ih, w_hh, b_ih, b_hh]
    _jacs = list(akg.differentiate(forward_c_op, [tensor_list[input_id]],
                                   Head))

    ###################################################
    # Need to disable CSE due to stmt dense() + dense()
    attrs = dict()
    attrs['disable_cse'] = True
    attrs['to_three_address_reuse'] = True
    attrs['to_three_address_min_split'] = 10
    ###################################################

    return _jacs[0], attrs
Esempio n. 4
0
def tanh_ad(head, in_data):
    """
    Compute gradient of tanh operator using automatic differentiate.

    Args:
        head (tvm.tensor.Tensor): Tensor of type float16, float32.
        in_data (tvm.tensor.Tensor): Tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor has the same shape as input.
    """
    in_dtype = in_data.dtype

    # On cloud environment, cast data type from 'float16' to 'float32',
    # then cast result back to 'float16', could achieve higher precision.
    if in_dtype == 'float16' and not utils.product_is_mini():
        in_data = akg.topi.cast(in_data, "float32")
        head = akg.topi.cast(head, "float32")

    out_data = tanh.tanh(in_data)
    jacs = list(akg.differentiate(out_data, [in_data], head))
    jacs_res = jacs[0]
    if in_dtype == 'float16' and not utils.product_is_mini():
        jacs_res = akg.topi.cast(jacs_res, 'float16')
    return jacs_res
Esempio n. 5
0
def bias_add_ad(head, input_shape, data_format):
    """
    Compute gradient for bias_add operator using automatic differentiate.

    Args:
        head (tvm.tensor.Tensor): Input tensor.
        input_shape (Union[list, tuple]): Input shape of head.
        data_format (str): Data format of input tensors.

    Returns:
        tvm.tensor.Tensor of same shape and type as head.
    """

    check_list = ["NHWC", "NC1HWC0", "DefaultFormat"]
    if data_format not in check_list:
        raise RuntimeError("bias_add_grad only support %s while dataformat is %s" % (",".join(check_list), data_format))
    vc_util.check_shape(head.shape)
    shape1 = [x.value for x in head.shape]
    vc_util.davinci_format_check(shape1, data_format)
    a = akg.tvm.placeholder(head.shape, head.dtype, "A")
    if data_format == "NC1HWC0":
        bias_shape = (1, head.shape[1], 1, 1, head.shape[4])
        b = akg.tvm.placeholder(bias_shape, head.dtype, "B")
    elif data_format == "NHWC":
        bias_shape = (input_shape[-1],)
        b = akg.tvm.placeholder(bias_shape, head.dtype, "B")
    else:
        bias_shape = (input_shape[1],)
        b = akg.tvm.placeholder(bias_shape, head.dtype, "B")
    c = bias_add.bias_add(a, b, data_format)

    jacs = list(akg.differentiate(c, [b], head))
    attrs = {}
    return jacs[0], attrs
Esempio n. 6
0
def logsoftmax_ad(shape, dtype, axis, kernel_name, attrs):
    """Compute the gradient of logsoftmax by autodiff."""
    check_list = ["float16"]
    if not dtype.lower() in check_list:
        raise RuntimeError("logsoftmax test only support %s while dtype is %s" % (",".join(check_list), dtype))
    # check_shape(shape)
    if axis < 0:
        axis = len(shape) + axis
    if axis >= len(shape):
        raise RuntimeError("axis should be less than dimension")
    if axis != len(shape) - 1:
        raise RuntimeError("Only support the last axis currently")

    shape_new = [shape[-2], shape[-1]]
    if len(shape) > 2:
        for i in range(len(shape) - 2):
            shape_new[0] = shape_new[0] * shape[i]
    shape = shape_new

    a_up = akg.tvm.placeholder(shape, dtype=dtype, name="input")
    b_up = logsoftmax.logsoftmax_op(a_up, shape, axis)

    head = akg.tvm.placeholder(b_up.shape, name="head", dtype=dtype)
    _jacs = list(akg.differentiate(b_up, [a_up], head))
    sjac = akg.tvm.create_schedule([_jacs[0].op])
    sjac[_jacs[0].op.input_tensors[1]].compute_inline()
    op_vars = [head, a_up, _jacs[0]]

    with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True):
        mod = akg.build(sjac, op_vars, "cce", name="test2", attrs=attrs, polyhedral=True)
        return mod
Esempio n. 7
0
def rnncell_relu_ad(inputs, hidden, w_ih, w_hh, b_ih, b_hh, Head, input_id):
    forward_op = rnn_relu_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh)

    tensor_list = [inputs, hidden, w_ih, w_hh, b_ih, b_hh]
    _jacs = list(akg.differentiate(forward_op, [tensor_list[input_id]], Head))

    return _jacs[0]
Esempio n. 8
0
def abs_ad(head, in_data):
    """
    Compute gradient of abs operator with automatic differentiate.

    Args:
        head (tvm.tensor.Tensor): Tensor of type float16, float32, int8, uint8, int32.
        in_data (tvm.tensor.Tensor): Tensor of type float16, float32, int8, uint8, int32.

    Returns:
        tvm.tensor.Tensor has the same shape as input.
    """

    dtype = in_data.dtype
    # check head's validation.
    vc_util.check_shape(head.shape)
    vc_util.ops_dtype_check(head.dtype, vc_util.DtypeForDavinci.ALL_TYPES)
    need_cast_dtype = ["int8", "int32", "uint8"]

    abs_data = abs.abs_value(in_data)
    if head.dtype in need_cast_dtype:
        head = akg.tvm.compute(head.shape, lambda *indice: head(*indice).astype("float16"), name='head_cast')
    if dtype in need_cast_dtype:
        abs_data = akg.tvm.compute(abs_data.shape,
                                   lambda *indice: abs_data(*indice).astype("float16"),
                                   name='abs_cast')
    jacs = list(akg.differentiate(abs_data, [in_data], head))
    if dtype in need_cast_dtype:
        jacs[0] = akg.tvm.compute(jacs[0].shape, lambda *indice: jacs[0](*indice).astype(dtype), name='res')
    return jacs[0]
Esempio n. 9
0
def matmul_ad(data_shape, weight_shape, dtype, attrs=None):
    check_list = ["float16"]
    if not (dtype.lower() in check_list):
        raise RuntimeError("matmul test only support %s while dtype is %s" %
                           (",".join(check_list), dtype))
    # check_shape(shape)
    assert (len(data_shape) == 2)
    assert (len(weight_shape) == 2)
    assert (data_shape[1] == weight_shape[0])

    m, k = data_shape
    _, n = weight_shape

    a = akg.tvm.placeholder((m, k), name='a', dtype=dtype)
    b = akg.tvm.placeholder((k, n), name='b', dtype=dtype)
    kk = akg.tvm.reduce_axis((0, k), name='kk')
    c = akg.tvm.compute(
        (m, n),
        lambda i, j: akg.lang.ascend.mmad(a[i, kk] * b[kk, j], axis=kk),
        name="c")

    head = akg.tvm.placeholder(c.shape, name="Head", dtype='float16')
    _jacs = list(akg.differentiate(c, [a], head))
    sjac = akg.tvm.create_schedule([_jacs[0].op])
    op_vars = [head, b, _jacs[0]]

    with akg.build_config(add_lower_pass=debug_mode(0), dump_pass_ir=True):
        mod = akg.build(sjac,
                        op_vars,
                        "cce",
                        name="test2",
                        attrs=attrs,
                        polyhedral=True)
        return mod
Esempio n. 10
0
def minimum_ad(head, data_x, data_y, grad_x=True, grad_y=True):
    """
    Calculating the reversed outputs of the operator minimum by using automatic differentiate.

    Args:
        head (tvm.tensor.Tensor): Input tensor of float32, float16 and int32.
        data_x (tvm.tensor.Tensor): Input tensor of float32, float16 and int32.
        data_y (tvm.tensor.Tensor): Input tensor of float32, float16 and int32.
        grad_x (bool): Default is True, whether to differentiate x.
        grad_y (bool): Default is True, whether to differentiate y.

    Returns:
        tvm.tensor.Tensor, has the same type and shape as grads, if grad_x and grad_y all equal to True, need return
        a list like: [jacs[0], jacs[1]].
    """
    utils.elemwise_shape_check(data_x.shape, data_y.shape)
    utils.elemwise_shape_check(head.shape, data_x.shape)
    utils.elemwise_dtype_check(
        data_x.dtype, head.dtype,
        [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32])
    utils.elemwise_dtype_check(
        data_x.dtype, data_y.dtype,
        [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32])
    if not grad_x and not grad_y:
        raise ValueError("At least one of grad_x and grad_y is True.")
    op = minimum(data_x, data_y)
    jacs = list(akg.differentiate(op, [data_x, data_y], head))
    if grad_x and grad_y:
        return jacs[0], jacs[1]
    if grad_x:
        return jacs[0]
    return jacs[1]
Esempio n. 11
0
def softmax_ad_optimized(head, data, axis=-1):
    """
    Computes the autodiff of softmax.

    Args:
        head (tvm.tensor.Tensor): Original differentiation values.
        data (tvm.tensor.Tensor): Input of softmax.
        axis (int): Along which axis softmax is performed.

    Returns:
        tvm.tensor.Tensor, the overall differentiation values.
    """
    def get_shape(pld):
        return [d.value for d in pld.shape]

    def temp_compute(shape, grad, sftmx_fwd, *indices):
        shp_len = len(shape)
        grad_index = indices[:(shp_len - 2)] + indices[-1:]
        sftmx_fwd_index = indices[:-1]
        temp = grad(*grad_index) * akg.tvm.expr.Select(
            indices[-1] == indices[-2],
            sftmx_fwd(*sftmx_fwd_index) * (1 - sftmx_fwd(*sftmx_fwd_index)),
            -sftmx_fwd(*sftmx_fwd_index) * sftmx_fwd(*grad_index))
        return temp

    def temp_sum_compute(shape, temp, *indices):
        kk = akg.tvm.reduce_axis((0, shape[-1]), name='kk')
        index = indices[:] + (kk, )
        temp_sum = akg.tvm.sum(temp(*index), axis=kk)
        return temp_sum

    def custom_softmax_fdiff(out, inputs, grad, ad_attrs, new_pld_array):
        data = inputs[0]
        shape = get_shape(data)
        sftmx_fwd = Softmax(data, -1)[0]
        shape.append(shape[-1])

        temp = akg.tvm.compute(
            shape,
            lambda *indices: temp_compute(shape, grad, sftmx_fwd, *indices),
            name="softmax_select2")
        temp_sum = akg.tvm.compute(
            shape[:-1],
            lambda *indices: temp_sum_compute(shape, temp, *indices),
            name="softmax_ad2")
        return [temp_sum]

    l_up = Softmax(data, axis)[0]

    # For the large expression tree's dl w.r.t. data (where softmax is embedded inside), use the default fdiff.
    # For softmax's dl w.r.t. data (note: l_up is not a direct dependency of data), use the custom_softmax_fdiff.
    # In this case, l_up is the same as l_up, and data same as data, but this needs not be the case.
    [dl_ddata
     ] = akg.differentiate(l_up, [data],
                           head,
                           None,
                           None,
                           override={l_up: ([data], custom_softmax_fdiff)})
    attrs = {}
    return dl_ddata, attrs
Esempio n. 12
0
def gelu_ad_custom(head, in_data, target="cce"):
    """
    Automatic differentiation of gelu with customize function.

    In order to achieve higher precision, we could also self-define tanh part differentiate with simplify calculation.
    """
    dtype = in_data.dtype
    const1 = akg.tvm.const(0.044715, dtype)
    const2 = akg.tvm.const(0.7978845, dtype)
    const3 = akg.tvm.const(0.1070322, dtype)
    tmp0 = akg.topi.multiply(in_data, in_data)
    pow0 = akg.topi.multiply(tmp0, in_data)
    mul0 = pow0 * const1
    add0 = in_data + mul0
    mul1 = add0 * const2
    tanh_res = Tanh(mul1)
    add1 = tanh_res + akg.tvm.const(1, dtype)
    mul2 = add1 * akg.tvm.const(0.5, dtype)
    mul3 = in_data * mul2
    res = mul3

    def gelu_diff(out, inp, head, ad_attrs, new_array_pld):
        temp = tanh_fdiff(head, mul1)
        return [
            temp * (akg.tvm.const(0.7978845, dtype) + const3 * inp[0] * inp[0])
        ]

    jacs = list(
        akg.differentiate(res, [in_data],
                          head,
                          None,
                          None,
                          override={tanh_res: ([in_data], gelu_diff)}))
    return jacs[0]
Esempio n. 13
0
def blas_axby_ad(head, alpha, beta):
    """Compute gradient of blas_axby operator using automatic differentiate."""
    x = akg.tvm.placeholder(head.shape, head.dtype, "inputx")
    y = akg.tvm.placeholder(head.shape, head.dtype, "inputy")
    op = blas_axby.blas_axby(x, y, alpha, beta)
    jacs = list(akg.differentiate(op, [x, y], head))
    return jacs[0], jacs[1]
Esempio n. 14
0
def erf_ad(head, x):
    """Compute gradient of erf operator using automatic differentiate."""
    if utils.product_is_mini():
        raise RuntimeError("Not support erf_ad on mini device.")
    output = erf.erf(x)
    jacs = list(akg.differentiate(output, [x], head))
    return jacs[0]
def sparse_softmax_cross_entropy_with_logits_ad(labels,
                                                logits,
                                                reduction='mean',
                                                grad_scale=1.0):
    """Compute gradient for sparse_softmax_cross_entropy_with_logits operator using automatic differentiate."""
    attr_map = {}

    def custom_softmax_cross_entropy_with_logits_fdiff(out, inputs, grad,
                                                       attrs, new_pld_array):
        strategy, _, backprop = loss.sparse_softmax_cross_entropy_with_logits_impl(
            inputs[1], inputs[0], reduction=reduction, scale=grad_scale)
        if strategy:
            attr_map["custom_tiling"] = strategy
        return [backprop]

    l_value, _ = loss.sparse_softmax_cross_entropy_with_logits(
        labels, logits, reduction)
    head = akg.tvm.compute(l_value.shape,
                           lambda *i: akg.tvm.const(1.0, l_value.dtype),
                           name='head')
    [dl_dlogits
     ] = akg.differentiate(l_value, [logits],
                           head,
                           None,
                           None,
                           override={
                               l_value:
                               ([logits, labels],
                                custom_softmax_cross_entropy_with_logits_fdiff)
                           })
    return dl_dlogits, attr_map
Esempio n. 16
0
def reduce_max_ad_optimized(head, data, axis, keepdims, target="cce"):
    def get_shape(pld):
        return [d.value for d in pld.shape]

    def custom_reduce_max_fdiff(out, inputs, grad, ad_attrs, new_pld_array):
        data = inputs[0]
        shape = get_shape(data)
        max_ = akg.lang.ascend.reduce_max(data, axis=axis, keepdims=keepdims)
        max_broadcast = akg.lang.ascend.broadcast(max_, shape)
        return [
            akg.tvm.compute(shape,
                            lambda *indices: akg.tvm.expr.Select(
                                data(*indices) == max_broadcast(*indices),
                                grad(*get_reduced_indices(
                                    *indices, axis=axis, keepdims=keepdims)),
                                akg.tvm.const(0, dtype=data.dtype)),
                            name="reduce_max_ad2")
        ]

    l = reduce_max(data, axis, keepdims, target=target)

    [dl_ddata
     ] = akg.differentiate(l, [data],
                           head,
                           None,
                           None,
                           override={l: ([data], custom_reduce_max_fdiff)})
    return dl_ddata
Esempio n. 17
0
def avgpool_ad(head, data, kernel, stride, pad):
    """Compute gradient of avgpool operator using automatic differentiate."""
    attrs = {
        "enable_post_poly_loop_partition": False,
        "enable_pre_poly_loop_partition": False
    }
    avgpool_fwd, _ = avgpool(data, kernel, stride, pad)
    [dl_ddata] = akg.differentiate(avgpool_fwd, [data], head)
    return dl_ddata, attrs
Esempio n. 18
0
def conv_input_ad(input_ad_inputs,
                  fmap_shape,
                  filter_shape,
                  pad_,
                  stride_,
                  dilation_,
                  attrs=None):
    """
    Compute dx according to "conv forward".

    Args:
        input_ad_inputs (list[tvm.tensor.Tensor]): a list with length 2.
              input_ad_inputs[0](consider as dy) Tensor of type float16 ,shape 5D(out_n, out_c//C0, out_h, out_w,C0)
              input_ad_inputs[1](consider as w)  Tensor of type float16 ,shape 4D(wC//C0*wH*wW, wN//C0, C0,C0)
        fmap_shape (list): [fN, fC, fH, fW]
        filter_shape (list): [wN, wC, wH, wW]
        pad_ (list): [pad_left, pad_right, pad_top, pad_bottom]
        stride_ (list): [stride_h, stride_w]
        dilation_ (list): [dilation_h, dilation_w]
        attrs (dict): a dict with keys like conv_tile, bypass and etc.

    Returns:
        tvm.tensor.Tensor, configs.
    """

    backward_dy, forward_w = input_ad_inputs

    in_n, in_c, in_h, in_w = fmap_shape
    block_size = 16
    in_c = (in_c + block_size - 1) // block_size * block_size
    x_5d_shape = (in_n, in_c // block_size, in_h, in_w, block_size)

    forward_x = akg.tvm.placeholder(x_5d_shape, forward_w.dtype, "input_X")
    original_filter_shape = akg.tvm.placeholder(filter_shape, forward_w.dtype,
                                                "input_filter")
    forward_output, _ = conv_forward.conv([forward_x, forward_w],
                                          fmap_shape,
                                          filter_shape,
                                          pad_,
                                          stride_,
                                          dilation_,
                                          use_bias=False,
                                          attrs=attrs)

    ad_attrs = {"ad_conv_enable": 1, "ad_conv_reuse_conv": 0}
    jacs = list(
        akg.differentiate(forward_output, [forward_x], backward_dy, ad_attrs,
                          [backward_dy, forward_w, original_filter_shape]))
    configs = conv_input_ad_config([backward_dy, forward_w],
                                   fmap_shape,
                                   filter_shape,
                                   pad_,
                                   stride_,
                                   dilation_,
                                   attrs=attrs)

    return jacs[0], configs
Esempio n. 19
0
def maxpool_ad_no_custom_diff_poly_all_max(head, data, kernel, stride, pad):
    """automatic differentiate of maxpool with polyhedral"""
    attrs = {
        "enable_post_poly_loop_partition": False,
        "enable_pre_poly_loop_partition": False
    }
    maxpool_fwd = maxpool.old_maxpool(data, kernel, stride, pad)
    [dl_ddata] = akg.differentiate(maxpool_fwd, [data], head, None, None)
    return dl_ddata, attrs
Esempio n. 20
0
def smooth_l1_loss_ad(head,
                      prediction,
                      target,
                      anchor_samples,
                      anchor_sample_correct=0,
                      delta=1.0):
    b = smooth_l1_loss.smooth_l1_loss(prediction, target, anchor_samples,
                                      anchor_sample_correct, delta)
    _jacs = list(akg.differentiate(b[0], [prediction], head))
    return _jacs[0]
Esempio n. 21
0
def roi_align_ad(head,
                 data,
                 rois,
                 pooled_size,
                 spatial_scale,
                 sample_ratio,
                 target="cce"):
    output = akg.topi.vision.rcnn.roi_align.roi_align_nchw(
        data, rois, pooled_size, spatial_scale, sample_ratio)
    _jacs = list(akg.differentiate(output, [data], head))
    return _jacs[0]
Esempio n. 22
0
def triplet_loss_ad(head,
                    anchor_output,
                    positive_output,
                    negative_output,
                    margin=1.0,
                    input_id=0):
    if not ((input_id >= 0) and (input_id <= 2)):
        raise RuntimeError("Error: input_id should be 0, 1 or 2 only!")
    fwd = triplet_loss_naive(anchor_output, positive_output, negative_output,
                             margin)

    if (input_id == 0):
        _jacs = list(
            akg.differentiate(
                fwd, [anchor_output, positive_output, negative_output], head))
    elif (input_id == 1):
        _jacs = list(akg.differentiate(fwd, [positive_output], head))
    else:
        _jacs = list(akg.differentiate(fwd, [negative_output], head))
    return _jacs[0]
Esempio n. 23
0
def mean_ad(head, input_shape, axis, keepdims):
    """mean autodiff."""
    tensor_a = tvm.placeholder(input_shape, head.dtype, "A")
    tensor_b = mean.mean(tensor_a, axis, keepdims)

    # remove useless mean_output
    if isinstance(tensor_b, tuple):
        tensor_b = tensor_b[0]
    if tensor_b.op.name == "mean_output":
        tensor_b = tensor_b.op.input_tensors[0]

    jacs = list(akg.differentiate(tensor_b, [tensor_a], head))
    return jacs[0]
Esempio n. 24
0
def elu_ad(head, x, target="cce"):
    """
    Computes elu_grad.

    Args:
        head (tvm.tensor.Tensor): Tensor of type float16, float32
        x (tvm.tensor.Tensor): Input of elu

    Returns:
        akg.tvm.Tensor of same type and shape as inputs
    """
    y = elu.elu(x)
    jacs = list(akg.differentiate(y, [x], head))
    return akg.lang.ascend.cast_to(jacs[0], head.dtype)
Esempio n. 25
0
def bernoulli_logprob_ad(head, x, probs):
    """
    An example of differentiating bernoulli.logprob in all inputs and paramters

    Args:
        head: The adjoint of the output, in other words, some tensors, by which the Jacobians
            will be multiplied
        x: input, tenosor of 0 or 1
        probs: probabilities of random variables taking values 1

    """
    mod = bernoulli.bernoulli(probs).log_prob(x)
    auto_diff_outs = list(akg.differentiate(mod, [x, probs], head))
    return auto_diff_outs
Esempio n. 26
0
def cos_ad(head, a, target="cce"):
    """
    Computes cosine derivative value of a tensor.

    Args:
        head (tvm,tensor.Tensor): Tensor of type float16, float32
        a (tvm,tensor.Tensor): Tensor of type float16, float32

    Returns:
        akg.tvm.Tensor of same type and shape as inputs
    """
    b, attr = cos.cos(a)
    jacs = list(akg.differentiate(b, [a], head))
    return jacs[0], attr
Esempio n. 27
0
def avgpool_ad_no_custom_diff_manual_schedule(head, data, kernel, stride, pad):
    """automatic differentiate of avgpool with manual schedule."""
    attrs = {
        "enable_post_poly_loop_partition": False,
        "enable_pre_poly_loop_partition": False
    }
    avgpool_fwd, _ = avgpool.avgpool(data, kernel, stride, pad)
    [dl_ddata] = akg.differentiate(avgpool_fwd, [data], head)
    # schedule for differetiation operation
    s = akg.tvm.create_schedule([dl_ddata.op])

    kh, kw = kernel
    shape = get_shape(data)
    ib, ic1, ih, iw, ic0 = shape

    if kh == ih and kw == iw:
        pad2d_input_2_grad = dl_ddata
        res_value_res_grad = pad2d_input_2_grad.op.input_tensors[0]
        head = res_value_res_grad.op.input_tensors[0]

        def comp_func(s):
            head_ub = s.cache_read(head, "local.UB", [res_value_res_grad])
            result_ub = s.cache_write(pad2d_input_2_grad, "local.UB")

            s[res_value_res_grad].set_scope("local.UB")

            b, c1, h, w, c0 = pad2d_input_2_grad.op.axis
            s[head_ub].compute_at(s[pad2d_input_2_grad], b)
            s[res_value_res_grad].compute_at(s[pad2d_input_2_grad], b)
            s[result_ub].compute_at(s[pad2d_input_2_grad], b)
    else:
        pad2d_input_2_grad = dl_ddata
        Broadcast_jac = pad2d_input_2_grad.op.input_tensors[0]
        res_value_res_grad = Broadcast_jac.op.input_tensors[0]
        head = res_value_res_grad.op.input_tensors[0]

        def comp_func(s):
            head_ub = s.cache_read(head, "local.UB", [res_value_res_grad])
            result_ub = s.cache_write(pad2d_input_2_grad, "local.UB")

            s[Broadcast_jac].set_scope("local.UB")
            s[res_value_res_grad].set_scope("local.UB")

            b, c1, h, w, c0 = result_ub.op.axis
            s[result_ub].reorder(*result_ub.op.reduce_axis, b, c1, h, w, c0)

            s[Broadcast_jac].compute_at(s[result_ub], b)

    return dl_ddata, comp_func, attrs
Esempio n. 28
0
def normal_diag_KLdiv_ad(head, mean, scale):
    """
    An example of differentiating normal_diag.KLdiv in all inputs and paramters

    Args:
        head: The adjoint of the output, in other words, some tensors, by which the Jacobians
            will be multiplied
        x: input
        mean: vector of means of MVN
        scale: vector of sigma of MVN with diagonal covariance

    """
    mod = normal_diag.normal_diag(mean, scale).KL_divergence()
    auto_diff_outs = list(akg.differentiate(mod, [mean, scale], head))
    return auto_diff_outs
Esempio n. 29
0
def reduce_min_ad_optimized(HEAD, data, axis, keepdims):
    def get_shape(pld):
        return [d.value for d in pld.shape]

    def grad_compute(grad, *indices):
        indices_list = list(indices)
        axis_list = [x + len(indices_list) if x < 0 else x for x in list(axis)]

        if keepdims:
            grad_indices_list = [
                indices_list[i] if i not in axis_list else 0
                for i in range(len(indices_list))
            ]
        else:
            grad_indices_list = [
                indices_list[i] for i in range(len(indices_list))
                if i not in axis_list
            ]

        grad_ind = tuple(grad_indices_list)

        return grad(*grad_ind)

    def custom_reduce_min_fdiff(out, inputs, grad, ad_attrs, new_pld_array):
        data = inputs[0]
        shape = get_shape(data)
        min_ = akg.lang.cce.reduce_min(data, axis=axis, keepdims=keepdims)
        min_broadcast = akg.lang.cce.broadcast(min_, shape)
        return [
            akg.tvm.compute(shape,
                            lambda *indices: akg.tvm.expr.Select(
                                data(*indices) == min_broadcast(*indices),
                                grad_compute(grad, *indices),
                                akg.tvm.const(0, dtype=data.dtype)),
                            name="reduce_min_ad2")
        ]

    L = reduce_min.reduce_min(data, axis, keepdims)

    [dL_ddata
     ] = akg.differentiate(L, [data],
                           HEAD,
                           None,
                           None,
                           override={L: ([data], custom_reduce_min_fdiff)})
    return dL_ddata
Esempio n. 30
0
def mean_ad(head, input_shape, axis, keepdims):
    """
    Compute gradient of mean operator using automatic differentiate.

    Args:
        head (tvm.tensor.Tensor): Input tensor.
        input_shape (Union[list, tuple]): Shape of input tensor of mean operator.
        axis (Union[list, tuple, int]): Specifies which axis to reduce.
        keepdims (bool): Keep the reduced axis with length 1 if keepdims is true.

    Returns:
        tvm.tensor.Tensor.
    """
    a = akg.tvm.placeholder(input_shape, head.dtype, "A")
    b, _ = mean.mean(a, axis, keepdims)
    jacs = list(akg.differentiate(b, [a], head))
    return jacs[0]