Example #1
0
 def import_get_func(self, func, mode):
     """
     from test_run.tile_run import tile_compile
     :param func: function name
     :param mode: case mode
     :return:
     """
     run_mod = "tests.common.test_run"
     func_py = None
     try:
         backend_mod = run_mod + "." + utils.get_backend(
             self.target).lower()
         func_mod = backend_mod + "." + func
         func_py = __import__(func_mod, fromlist=func)
     except ModuleNotFoundError:
         func_mod = run_mod + "." + func
         func_py = __import__(func_mod, fromlist=func)
     if func_py is None:
         raise ImportError("import %s fail, please check!" % func_mod)
     try:
         run_func = getattr(func_py, func)
     except (ImportError, AttributeError):
         new_func = func.split("_run")[0] + "_" + mode
         run_func = getattr(func_py, new_func)
     return run_func
Example #2
0
def sum_by_shape(broadcast_data, original_shape, target=utils.CCE):
    """
    sum the broadcast_data by original shape; gradient for Broadcast.

    Supported Platforms:
        'Ascend'
    """
    if target != utils.CCE:
        raise RuntimeError('operator not supported on %s' %
                           utils.get_backend(target))

    broadcast_shape = get_shape(broadcast_data)
    original_shape = get_shape(original_shape)
    if broadcast_shape == original_shape:
        return broadcast_data
    if original_shape == [1]:
        data = sum(broadcast_data, target=target)
        return data

    utils.broadcast_check(original_shape, broadcast_shape)
    axis_len = len(broadcast_shape) - len(original_shape)
    if axis_len > 0:
        axis = list(range(axis_len))
        broadcast_data = sum(broadcast_data, axis, False, target=target)
        broadcast_shape = get_shape(broadcast_data)

    axis = []
    for i, _ in enumerate(original_shape):
        if original_shape[i] != broadcast_shape[i]:
            axis.append(i)
    res = sum(broadcast_data, axis, True,
              target=target)[0] if axis else broadcast_data
    return res
Example #3
0
def sum_v2(inputs, axis=None, keepdims=True, target=utils.CCE):
    """
    another implementation of sum with topi api.

    Supported Platforms:
        'Ascend'
    """
    if target != utils.CCE:
        raise RuntimeError('operator not supported on %s' %
                           utils.get_backend(target))

    dtype = inputs.dtype
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)
    axis = ft_util.refine_reduce_axis(inputs, axis)
    utils.check_shape(inputs.shape)
    if not axis:
        output = akg.topi.identity(inputs)
    else:
        if dtype == "float16":
            step_sum = Cast(inputs, "float32", target)
        else:
            step_sum = inputs

        step_sum = akg.topi.sum(step_sum, axis=axis, keepdims=keepdims)

        if dtype == "float16":
            output = Cast(step_sum, "float16", target)
        else:
            output = step_sum
    return output
Example #4
0
def prod_force_se_a(net_deriv_tensor,
                    in_deriv_tensor,
                    nlist_tensor,
                    natoms=192,
                    target=utils.CCE):
    """
    Supported Platforms:
        'Ascend'
    """
    if target != utils.CCE:
        raise RuntimeError('operator not supported on %s' %
                           utils.get_backend(target))
    net_deriv_tensor_shape = net_deriv_tensor.shape
    nlist_tensor_shape = nlist_tensor.shape
    natoms = net_deriv_tensor_shape[1]
    nframes = net_deriv_tensor_shape[0]
    ndescript = net_deriv_tensor_shape[2]
    nnei = nlist_tensor_shape[2]

    output_shape = [nframes, natoms, 3]

    @script
    def prod_force_se_a_compute(net_deriv_tensor, in_deriv_tensor,
                                nlist_tensor):
        force = output_tensor(output_shape, dtype=net_deriv_tensor.dtype)
        for kk in range(nframes):
            for ii in range(natoms):
                for cc in range(3):
                    force[kk, ii, cc] = 0.0
            for ii in range(natoms):
                for aa in range(ndescript):
                    for cc in range(3):
                        force[kk, ii, cc] -= net_deriv_tensor[
                            kk, ii, aa] * in_deriv_tensor[kk, ii, aa, cc]
                for jj in range(nnei):
                    j_idx = nlist_tensor[kk, ii, jj]
                    if j_idx > -1:
                        for aa in range(jj * 4, jj * 4 + 4):
                            for cc in range(3):
                                force[kk, j_idx, cc] += net_deriv_tensor[
                                    kk, ii, aa] * in_deriv_tensor[kk, ii, aa,
                                                                  cc]
        return force

    output = prod_force_se_a_compute(net_deriv_tensor, in_deriv_tensor,
                                     nlist_tensor)
    attrs = {
        'enable_post_poly_loop_partition': False,
        'enable_double_buffer': False,
        'enable_feature_library': True,
        'RewriteVarTensorIdx': True
    }
    return output, attrs
Example #5
0
def floor(data, target=utils.CCE):
    """
    Returns element-wise largest integer not greater than x.

    Args:
        data (akg.tvm.tensor.Tensor): Tensor of type float16, and float32

    Returns:
        akg.tvm.tensor.Tensor, has the same shape as data and type of int32.

    Supported Platforms:
        'Ascend'
    """
    if target != utils.CCE:
        raise RuntimeError('operator not supported on %s' %
                           utils.get_backend(target))
    utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    shape = [x.value for x in data.shape]
    utils.check_shape(shape)

    if product_is_mini() and data.dtype == "float32":
        # solve the problem of 87==floor(86.9996) when high_precision is needed.
        # problem is caused by such as fp16(86.9996)==87.
        # detect problem by fp32(86.9996) - fp32(floor(fp16(86.9996))) < 0

        # floor could only apply on float16
        data_fp16 = akg.lang.ascend.cast_to(data, "float16")
        floor_data = akg.lang.ascend.floor(data_fp16)
        floor_fp16 = akg.lang.ascend.cast_to(floor_data, "float16")
        floor_fp32 = akg.lang.ascend.cast(floor_fp16, "float32")

        # if diff=1e-7, we cannot get right sign of fp16(diff)
        # but we can get right sign of 10000*diff = 1e-3, which has the same
        # sign as diff
        diff = (data - floor_fp32) * 10000
        diff_fp16 = akg.lang.ascend.cast_to(diff, "float16")

        # if diff < 0 and floor == ceil, then it's 87 = floor(86.99999)
        res = akg.tvm.compute(
            shape,
            lambda *i: akg.tvm.expr.Select(
                diff_fp16(*i) < akg.tvm.const(0, "float16"),
                floor_fp16(*i) - akg.tvm.const(1, "float16"), floor_fp16(*i)),
            name="res")

        res = akg.lang.ascend.cast_to(res, "int32")
    else:
        res = akg.lang.ascend.floor(data)

    return res
Example #6
0
def prod_force_se_a_grad(grad_tensor, in_deriv_tensor, nlist_tensor, natoms=192, target=utils.CCE):
    """
    Supported Platforms:
        'Ascend'
    """
    if target != utils.CCE:
        raise RuntimeError('operator not supported on %s' % utils.get_backend(target))
    net_deriv_tensor_shape = grad_tensor.shape
    natoms = net_deriv_tensor_shape[1]
    nframes = net_deriv_tensor_shape[0]
    nnei = nlist_tensor.shape[2]
    ndescript = nnei * 4
    output_shape = [nframes, natoms, ndescript]

    @script
    def prod_force_se_a_grad_compute(grad_tensor, in_deriv_tensor, nlist_tensor):
        grad_net = output_tensor(output_shape, dtype=grad_tensor.dtype)

        for kk in range(nframes):
            for ii in range(natoms):
                for jj in range(nnei):
                    for aa in range(jj*4, jj*4+4):
                        grad_net[kk, ii, aa] = 0.0
                        for cc in range(3):
                            grad_net[kk, ii, aa] -= grad_tensor[kk, ii, cc] * in_deriv_tensor[kk, ii, aa, cc]
                            j_idx = nlist_tensor[kk, ii, jj]
                            if j_idx > -1:
                                grad_net[kk, ii, aa] += grad_tensor[kk, j_idx, cc] * in_deriv_tensor[kk, ii, aa, cc]
        return grad_net

    output = prod_force_se_a_grad_compute(grad_tensor, in_deriv_tensor, nlist_tensor)
    attrs = {'enable_post_poly_loop_partition': False,
             'enable_double_buffer': False,
             'enable_cover_protect_optimize': False,
             'enable_feature_library': True,
             'RewriteVarTensorIdx': False}
    if nframes.value > 1:
        attrs['dim'] = "0 0 1 1 0 1 192 1 0 2 1 1 0 3 1 1"
    else:
        attrs['dim'] = "0 0 192 1 0 1 1 1 0 2 1 1 0 3 1 1"

    return output, attrs