Exemple #1
0
def fused_bn_follow_relu(data0, data1, data2, data3, data4, layout='NHWC', out_dtype='float16', target=utils.CUDA):
    """
    input:
    data0-4: bn parameters for conv2d tensor, length is 5
    data0: param0 beta
    data1: param1 gamma
    data2: param2 BNupdate: xi_variance
    data3: param6 BNreduce: xi_mean
    data4: param7 xi_conv2d, float16
    layout: only (N, H, W, C), (N, C, H, W) supported
    out_dtype: float16

    output:
    ReLU: max(batch-normalized tensor,  0)
    """
    if layout == 'NCHW':
        data4 = topi.transpose(data4, (0, 2, 3, 1))
    elif layout != 'NHWC':
        raise NotImplementedError(
            'Layout not supported {} '.format(layout))

    add0 = fused_bn_follow(data0, data1, data2, data3, data4)
    add0 = topi.cast(add0, out_dtype)
    output = topi.maximum(add0, 0)

    if layout == "NCHW":
        output = topi.transpose(output, (0, 3, 1, 2))

    return output
Exemple #2
0
def fused_bn_follow_relu_avgpool(data0, data1, data2, data3, data4, data5, layout='NHWC', out_dtype='float16', target=utils.CUDA):
    """
    input:
    data: length is 6
    data0: tensor1 after bn_double_relu
    data1-6: bn parameters for conv2d tensor2
    layout: only (N, H, W, C), (N, C, H, W) supported
    out_dtype: float16

    output:
    avg-pooling( max(batch-normalized tensor1 + batch-normalized tensor2,  0) )
    """
    if layout == 'NCHW':
        data0 = topi.transpose(data0, (0, 2, 3, 1))
        data5 = topi.transpose(data5, (0, 2, 3, 1))
    elif layout != 'NHWC':
        raise NotImplementedError(
            'Layout not supported {} '.format(layout))

    n, h, w, c = data0.shape
    inter_dtype = 'float32'
    add0 = fused_bn_follow(data1, data2, data3, data4, data5)
    add0 = topi.cast(add0, data0.dtype)
    add1 = topi.add(data0, add0)
    output = topi.maximum(add1, 0)
    output = topi.cast(output, inter_dtype)
    output = topi.sum(output, axis=(1, 2))
    output = topi.divide(output, h * w)
    output = topi.cast(output, out_dtype)

    return output
Exemple #3
0
def fused_bn_reduce_grad(data0,
                         data1,
                         data2,
                         data3,
                         data4,
                         data5,
                         data6,
                         data7,
                         layout='NHWC',
                         out_dtype='float16',
                         target=utils.CUDA):

    if layout == 'NCHW':
        data3 = topi.transpose(data3, (0, 2, 3, 1))
        data7 = topi.transpose(data7, (0, 2, 3, 1))
    elif layout != 'NHWC':
        raise NotImplementedError('Layout not supported {} '.format(layout))

    n, h, w, c = data3.shape
    const = n * h * w
    inter_dtype = 'float32'
    out1 = topi.multiply(data4, data5)
    out1 = topi.divide(out1, const)
    out1 = topi.expand_dims(out1, axis=0, num_newaxis=3)
    out1 = topi.broadcast_to(out1, (n, h, w, c))

    data3 = topi.cast(data3, inter_dtype)
    data2 = topi.expand_dims(data2, axis=0, num_newaxis=3)
    data2 = topi.broadcast_to(data2, (n, h, w, c))
    out2 = topi.multiply(data3, const)
    out2 = topi.subtract(out2, data2)

    data1 = topi.expand_dims(data1, axis=0, num_newaxis=3)
    data1 = topi.broadcast_to(data1, (n, h, w, c))
    data7 = topi.cast(data7, inter_dtype)
    out3 = topi.divide(data6, const)
    out3 = topi.subtract(data7, out3)
    out3 = topi.multiply(data1, out3)
    out3 = topi.divide(out3, data0)

    output = topi.subtract(out2, out3)
    output = topi.multiply(output, out1)

    output = topi.cast(output, out_dtype)

    if layout == "NCHW":
        output = topi.transpose(output, (0, 3, 1, 2))

    return output
Exemple #4
0
def fused_bn_reduce(data, layout, out_dtype):
    """
    input:
    data:  4-D Tensor
    layout: input layout, only 'NCHW', 'NHWC' supported
    out_dtype: "float16" or "float32"
    
    output:
    out1_sum: 1-D tensor (C), sum on the axis "C" of input
    out2_squared_sum: 1-D tensor (C), sum of  squared on the axis "C" of input
    """

    if layout == "NCHW":
        data = topi.transpose(data, axes=(0, 2, 3, 1))
    elif layout != "NHWC":
        raise NotImplementedError('Layout not supported {} '.format(layout))

    inter_dtype = 'float32'
    data_cast = topi.cast(data, inter_dtype)

    out1_sum = topi.sum(data_cast, axis=(0, 1, 2))
    out1_sum = topi.cast(out1_sum, out_dtype)

    squared = topi.multiply(data_cast, data_cast)
    out2_squared_sum = topi.sum(squared, axis=(0, 1, 2))
    out2_squared_sum = topi.cast(out2_squared_sum, out_dtype)

    return [out1_sum, out2_squared_sum]
def fused_relu_grad_bn_double_update_grad(data_1, data_2, data_3, data_4, data_5, data_6, data_7, layout='NHWC'):
    transform_list = [data_2, data_4, data_5, data_6, data_7]
    for i in transform_list:
        if layout == "NCHW":
            i = topi.transpose(i, axes=(0, 2, 3, 1))
        elif layout != "NHWC":
            raise NotImplementedError( 'Layout not supported {} '.format(layout))

    data_tmp1 = topi.full_like(data_7, 0.0)
    data_tmp2 = topi.greater(data_7, data_tmp1)
    data_tmp3 = topi.add(data_5, data_6)
    data_tmp4 = topi.where(data_tmp2, data_tmp3, data_tmp1)
    data_tmp5 = topi.cast(data_tmp4, 'float32')
    data_tmp7 = topi.sum(data_tmp5, axis=(0, 1, 2))

    n, h, w, c = data_7.shape
    data_tmp8 = topi.cast(data_2, 'float32')
    data_tmp9 = topi.full_like(data_tmp7, 1.0/(n*h*w))
    data_tmp10 = topi.multiply(data_1, data_tmp9)
    data_tmp11 = topi.broadcast_to(data_tmp10, data_tmp8.shape)
    data_tmp12 = topi.subtract(data_tmp8, data_tmp11)
    data_tmp13 = topi.multiply(data_tmp5, data_tmp12)
    data_tmp15 = topi.sum(data_tmp13, axis=(0, 1, 2))

    data_tmp16 = topi.cast(data_4, 'float32')
    data_tmp17 = topi.multiply(data_3, data_tmp9)
    data_tmp18 = topi.broadcast_to(data_tmp17, data_tmp16.shape)
    data_tmp19 = topi.subtract(data_tmp16, data_tmp18)
    data_tmp20 = topi.multiply(data_tmp5, data_tmp19)
    data_tmp22 = topi.sum(data_tmp20, axis=(0, 1, 2))

    return [data_tmp7, data_tmp15, data_tmp22]
Exemple #6
0
def fused_pad(input,
              pad_before,
              pad_after,
              layout='NHWC',
              pad_value=0.0,
              target=utils.CUDA):
    """
    fused_pad.
 
    Args:
        input : tvm.Tensor or Expr
        pad_before : list / tuple of n ints. (Pad width on each dimension to pad the before the axis begin.)
        pad_after : list / tuple of n ints. (Pad width each dimension to pad the after the axis end.)
        pad_value : float. (The value to be padded.)

    Returns
        tvm.Tensor
    """
    if layout == "NCHW":
        data = topi.transpose(data, axes=(0, 2, 3, 1))
    elif layout != "NHWC":
        raise NotImplementedError('Layout not supported {} '.format(layout))

    cast_after = topi.cast(input, 'float16')
    output = topi.nn.pad(cast_after, pad_before, pad_after, pad_value)
    return output
def auto_in_transpose(data, layout="NHDT"):
    layout_int = layout.replace('N', '0').replace(
        'H', '1').replace('D', '2').replace('T', '3')
    layout_list = list(layout_int)
    layout_axis = np.argsort(layout_list)
    data = topi.transpose(data, axes=tuple(layout_axis))
    return data
Exemple #8
0
def fused_l2loss_grad(data_f16,
                      data_f32,
                      layout='NHWC',
                      fill_data=4e-05,
                      target=utils.CUDA):
    """
    fused_l2loss_grad.

    Args:
        input: tvm.tensor.Tensor.

    Returns:
        ret.
    """
    if layout == "NCHW":
        data_f16 = topi.transpose(data_f16, axes=(0, 2, 3, 1))
    elif layout != "NHWC":
        raise NotImplementedError('Layout not supported {} '.format(layout))

    data_f16 = topi.cast(data_f16, 'float32')
    constant_tmp = topi.cast(fill_data, 'float32')
    data_constant = topi.full_like(data_f32, constant_tmp)
    data_out = topi.multiply(data_constant, data_f32)
    data_out = topi.add(data_f16, data_out)

    return data_out
Exemple #9
0
def bn_beta_grad_np(head, layout='NHWC'):
    if layout == 'NCHW':
        head = topi.transpose(head, (0, 2, 3, 1))
    elif layout != "NHWC":
        raise NotImplementedError('layout is not supported {} '.format(layout))

    bn_beta_grad = np.sum(head, axis=(0, 1, 2))
    return bn_beta_grad
Exemple #10
0
def auto_out_transpose(expect, layout_out="NHDT"):
    if len(expect.shape) == 3:
        layout_out = layout_out[1:]
    if len(expect.shape) == 2:
        layout_out = layout_out[2:]
    layout_out_int = layout_out.replace('N', '0').replace('H', '1').replace('D', '2').replace('T', '3')
    layout_out_list = list(layout_out_int)
    layout_out_axis = np.argsort(layout_out_list)
    expect = topi.transpose(expect, axes=tuple(layout_out_axis))
    return expect
Exemple #11
0
def fused_bn_double_follow_relu(data0,
                                data1,
                                data2,
                                data3,
                                data4,
                                data5,
                                data6,
                                data7,
                                data8,
                                data9,
                                layout='NHWC',
                                out_dtype='float16',
                                target=utils.CUDA):
    """
    input:
    data: length is 5
    data0-4: bn parameters for conv2d tensor 1
    data5-9: bn parameters for conv2d tensor 2
    layout: only (N, H, W, C), (N, C, H, W) supported
    out_dtype: float16

    output:
    ReLU: max(batch-normalized tensor1 + batch-normalized tensor2,  0)
    """

    if layout == 'NCHW':
        data4 = topi.transpose(data4, (0, 2, 3, 1))
        data9 = topi.transpose(data9, (0, 2, 3, 1))
    elif layout != 'NHWC':
        raise NotImplementedError('Layout not supported {} '.format(layout))

    add0 = fused_bn_follow(data0, data1, data2, data3, data4)
    add1 = fused_bn_follow(data5, data6, data7, data8, data9)
    add0 = topi.cast(add0, out_dtype)
    add1 = topi.cast(add1, out_dtype)
    add2 = topi.add(add0, add1)
    output = topi.maximum(add2, 0)

    if layout == "NCHW":
        output = topi.transpose(output, (0, 3, 1, 2))

    return output
Exemple #12
0
def fused_relu_grad_bn_reduce_grad(data_1,
                                   data_2,
                                   data_3,
                                   data_4,
                                   data_5,
                                   data_6,
                                   data_7,
                                   data_8,
                                   data_9,
                                   layout='NHWC',
                                   target=utils.CUDA):
    """
    fused_relu_grad_bn_reduce_grad.

    Args:
        data_1~data_9: tvm.tensor.Tensor.
        layout: input layout, only 'NCHW', 'NHWC' supported

    Returns:
        tvm.tensor.Tensor.
    """
    transform_list = [data_7, data_8, data_9]
    for i in transform_list:
        if layout == "NCHW":
            i = topi.transpose(i, axes=(0, 2, 3, 1))
        elif layout != "NHWC":
            raise NotImplementedError(
                'Layout not supported {} '.format(layout))

    data_tmp1 = topi.multiply(data_4, data_5)
    n, h, w, c = data_9.shape
    data_tmp2 = topi.full_like(data_tmp1, 1.0 / (n * h * w))
    data_tmp3 = topi.multiply(data_tmp1, data_tmp2)

    data_tmp5 = topi.full_like(data_9, 0.0)
    data_tmp6 = topi.greater(data_9, data_tmp5)

    data_tmp7 = topi.where(data_tmp6, data_8, data_tmp5)

    data_tmp8 = topi.cast(data_tmp7, 'float32')
    data_tmp9 = topi.full_like(data_tmp8, n * h * w)
    data_tmp10 = topi.multiply(data_tmp8, data_tmp9)
    data_tmp12 = topi.subtract(data_tmp10, data_3)
    data_tmp14 = topi.cast(data_7, 'float32')
    data_tmp15 = topi.multiply(data_6, data_tmp2)

    data_tmp17 = topi.subtract(data_tmp14, data_tmp15)
    data_tmp18 = topi.multiply(data_2, data_tmp17)
    data_tmp20 = topi.divide(data_tmp18, data_1)
    data_tmp21 = topi.subtract(data_tmp12, data_tmp20)
    data_tmp22 = topi.multiply(data_tmp3, data_tmp21)
    data_out = topi.cast(data_tmp22, 'float16')

    return data_out
Exemple #13
0
def bn_gamma_grad_np(head, in_data, data_sum, layout='NHWC'):
    if layout == 'NCHW':
        head = topi.transpose(head, (0, 2, 3, 1))
    elif layout != "NHWC":
        raise NotImplementedError('layout is not supported {} '.format(layout))

    n, h, w, c = head.shape
    mean = np.divide(data_sum, n * h * w)
    x_hat = np.subtract(in_data, mean)
    x_hat_mul = np.multiply(x_hat, head)
    bn_gamma_grad = np.sum(x_hat_mul, axis=(0, 1, 2))
    return bn_gamma_grad
Exemple #14
0
def fused_relu_grad_bn_double_reduce_grad(data0, data1, data2, data3, data4, data5, data6, data7, data8,
                           data9, data10, data11, data12, data13, data14, data15, layout="NHWC",
                           out_dtype="float16", target=utils.CUDA):
    
    if layout == 'NCHW':
        data5 = topi.transpose(data5, (0, 2, 3, 1))
        data9 = topi.transpose(data9, (0, 2, 3, 1))
        data13 = topi.transpose(data13, (0, 2, 3, 1))
        data14 = topi.transpose(data14, (0, 2, 3, 1))
        data15 = topi.transpose(data15, (0, 2, 3, 1))
    elif layout != 'NHWC':
        raise NotImplementedError(
            'Layout not supported {} '.format(layout))
    
    inter_dtype = "float32"
    n, h, w, c = data5.shape
    scale = n * h * w

    mul = topi.multiply(data2, data3)
    mul1221 = topi.divide(mul, scale)

    # ReluGrad
    zero = tvm.const(0, data15.dtype)
    add = topi.add(data13, data14)
    addgrad = tvm.compute(add.shape, lambda *i: tvm.if_then_else(data15(*i) >= zero, add(*i), zero), tag=tag.INJECTIVE)
    addgrad = topi.cast(addgrad, inter_dtype)
    mul3283 = topi.multiply(scale, addgrad)
    sub1159 = topi.subtract(mul3283, data6)

    data5_cast = topi.cast(data5, inter_dtype)
    mul2372 = topi.divide(data4, scale)
    sub631 = topi.subtract(data5_cast, mul2372)
    mul1220 = topi.multiply(sub631, data1)
    div = topi.divide(mul1220, data0)
    sub271 = topi.subtract(sub1159, div)
    mul1218 = topi.multiply(mul1221, sub271)
    mul1218_cast = topi.cast(mul1218, out_dtype)

    mul1231 = topi.multiply(data11, data12)
    mul1230 = topi.divide(mul1231, scale)
    data9_cast = topi.cast(data9, inter_dtype)
    mul2364 = topi.divide(data8, scale)
    sub625 = topi.subtract(data9_cast, mul2364)
    mul1229 = topi.multiply(data10, sub625)

    div272 = topi.divide(mul1229, data7)
    sub272 = topi.subtract(sub1159, div272)
    mul1228 = topi.multiply(mul1230, sub272)
    mul1228_cast = topi.cast(mul1228, out_dtype)

    if layout == "NCHW":
        mul1218_cast = topi.transpose(mul1218_cast, (0, 3, 1, 2))
        mul1228_cast = topi.transpose(mul1228_cast, (0, 3, 1, 2))
    
    return [mul1218_cast, mul1228_cast]
Exemple #15
0
def fused_is_finite(data, layout='NHWC'):
    """
    fused_is_finite.

    Args:
        input: tvm.tensor.Tensor.

    Returns:
        ret.
    """
    if layout == "NCHW":
        data = topi.transpose(data, axes=(0, 2, 3, 1))
    elif layout != "NHWC":
        raise NotImplementedError('Layout not supported {} '.format(layout))
    data_isfinite = topi.isfinite(data)
    n, h, w, c = data_isfinite.shape
    data_out = topi.all(data_isfinite, axis=(0, 1, 2, 3))
    return data_out