Beispiel #1
0
def reduce_max_ad_optimized(head, data, axis, keepdims):
    def get_shape(pld):
        return [d.value for d in pld.shape]

    def custom_reduce_max_fdiff(out, inputs, grad, ad_attrs, new_pld_array):
        data = inputs[0]
        shape = get_shape(data)
        max_ = akg.lang.cce.reduce_max(data, axis=axis, keepdims=keepdims)
        max_broadcast = akg.lang.cce.broadcast(max_, shape)
        return [
            akg.tvm.compute(shape,
                            lambda *indices: akg.tvm.expr.Select(
                                data(*indices) == max_broadcast(*indices),
                                grad(*get_reduced_indices(
                                    *indices, axis=axis, keepdims=keepdims)),
                                akg.tvm.const(0, dtype=data.dtype)),
                            name="reduce_max_ad2")
        ]

    l = reduce_max.reduce_max(data, axis, keepdims)

    [dl_ddata
     ] = akg.differentiate(l, [data],
                           head,
                           None,
                           None,
                           override={l: ([data], custom_reduce_max_fdiff)})
    return dl_ddata
Beispiel #2
0
    def softmax_cross_entropy_with_logits(labels,
                                          logits,
                                          axis,
                                          reduction="mean",
                                          scale=1.0):
        max_logits = reduce_max(logits, axis, keepdims=True, target=utils.CCE)
        data_sub = sub(logits, max_logits, target=utils.CCE)
        akg.register_variables("minus_max", [logits], data_sub)
        data_exp = Exp(data_sub, target=utils.CCE)
        data_expsum = sum(data_exp, axis, keepdims=True, target=utils.CCE)
        data_expsum_log = log(data_expsum, target=utils.CCE)
        sub_value = sub(data_sub, data_expsum_log, target=utils.CCE)
        neg_labels = neg(labels, target=utils.CCE)
        cross_entropy = mul(neg_labels, sub_value, target=utils.CCE)
        # backprop: prob - labels, where prob = softmax(logits)
        prob = Exp(sub_value, target=utils.CCE)
        backprop = sub(prob, labels, target=utils.CCE)

        if reduction.lower() == "none":
            loss = sum_v2(cross_entropy, axis, keepdims=True)
        elif reduction.lower() == "mean":
            loss = sum_v2(cross_entropy, axis=None)
            factor = logits.shape[0].value
            loss = loss * akg.tvm.const(1 / factor, logits.dtype)
            backprop = backprop * akg.tvm.const(1 / factor, logits.dtype)
        elif reduction.lower() == "sum":
            loss = sum_v2(cross_entropy, axis=None)
        else:
            raise ValueError(
                "reduction method {0} is not supported".format(reduction))
        backprop = akg.topi.multiply(backprop,
                                     akg.tvm.const(scale, backprop.dtype))
        return loss, backprop
Beispiel #3
0
def segment_max(data, segment_ids, num_segments):
    """
    Computes the max value along segment_ids of a akg.tvm.tensor

    Args:
        data: akg.tvm.Tensor of type "float16", "float32" 
        segment_ids: akg.tvm.Tensor of type int32, sorted

    Returns:
        akg.tvm.Tensor of same shape and type as data

    """

    d_dtype = data.dtype
    vc_util.ops_dtype_check(d_dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    d_shape = [x.value for x in data.shape]
    vc_util.check_shape(d_shape)

    s_shape = segment_ids.shape
    vc_util.check_shape(s_shape)

    new_segment_ids, idx = gen_ids(segment_ids)

    output_shape = (1, ) + tuple(d_shape[len(s_shape):])
    zero_data = akg.tvm.compute(output_shape,
                                lambda *i: akg.tvm.const(0.0, d_dtype),
                                name="zero")

    data_list = split.split(data, new_segment_ids)
    out_n = num_segments

    out = []
    j = 0
    for i in range(0, out_n):

        if i in idx:
            tmp = reduce_max.reduce_max(data_list[j], 0, True)
            out.append(tmp)
            j = j + 1
        else:
            out.append(zero_data)

    res = concat.concat(out, 0)

    return res
Beispiel #4
0
def unsorted_segment_max(data, segment_ids, num_segments):
    """
    Computes the max value along segment_ids of a akg.tvm.Tensor

    Args:
        data: akg.tvm.Tensor of type float16, float32
        segment_ids: akg.tvm.Tensor of type int32, shape is a prefix of input_data.shape.
        num_segments: the number of classes in segment_ids

    Returns:
        akg.tvm.Tensor of same type as input_data,
    """

    d_dtype = data.dtype
    vc_util.ops_dtype_check(d_dtype, vc_util.DtypeForDavinci.ALL_FLOAT)

    d_shape = [x.value for x in data.shape]
    vc_util.check_shape(d_shape)

    s_shape = segment_ids.shape
    vc_util.check_shape(s_shape)

    new_segment_ids, idx = gen_ids(segment_ids)

    output_shape = (1, ) + tuple(d_shape[len(s_shape):])

    zero_data = akg.tvm.compute(output_shape,
                                lambda *i: akg.tvm.const(0.0, d_dtype),
                                name="zero")

    data_list, new_idx = split_new(data, new_segment_ids, idx, num_segments)

    out = []
    j = 0
    for i in range(0, num_segments):
        if i in new_idx:
            tmp = reduce_max.reduce_max(data_list[j], 0, True)
            out.append(tmp)
            j = j + 1
        else:
            out.append(zero_data)

    res = concat.concat(out, 0)
    return res
Beispiel #5
0
def focal_loss(prediction, target, gamma):
    """
    Calculate loss by focalloss.
    
    See Source: <a href="https://arxiv.org/abs/1708.02002">Focal Loss for Dense Object Detection;
                Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He, Piotr Dollár</a>
    
    This op fuses activation function (`softmax`) and loss function (`focalloss`) together.
    
    .. math::
        p = softmax(x) \\
        FL(p) = -(1-p)^{\\gamma}log(p)
    
    Args:
        prediction (tvm.tensor.Tensor): The predicted logits for each class,
            type is float32 or float16 and shape is `(batch_size, num_anchors, num_clases)`,
        target (tvm.tensor.Tensor): The one-hot encoded classification targets,
            type is float32, float16 or int32 and shape is `(batch_size, num_anchors, num_classes)`,
        gamma (float): positive float number.
    
    Returns:
        tvm.tensor.Tensor, has the same type as inputs with shape `(batch_size, num_anchors)`.
    """

    vc_util.check_shape(prediction, length=3, tensor_name="prediction")
    vc_util.check_shape(target, length=3, tensor_name="target")
    vc_util.ops_dtype_check(prediction.dtype,
                            vc_util.DtypeForDavinci.ALL_FLOAT)
    vc_util.ops_dtype_check(
        target.dtype,
        [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32])
    vc_util.check_greater("gamma", "zero", gamma, 0)

    dim_info, _ = focal_loss_set_dim_func(prediction, target)
    attrs = {"dim": dim_info}

    dtype = prediction.dtype

    if utils.product_is_mini() and dtype == 'float32':
        prediction = akg.topi.cast(prediction, "float16")
        target = akg.topi.cast(target, "float16")

    axis = -1
    shape = get_shape(prediction)

    maxv = reduce_max(prediction, axis=axis, keepdims=True)

    k1 = akg.tvm.reduce_axis((0, shape[-1]), name="k1")
    expsum = akg.tvm.compute(
        shape[:-1],
        lambda *i: akg.tvm.sum(akg.tvm.exp(prediction(*i, k1) - maxv(*i, 0)),
                               axis=k1),
        name="expsum")

    gamma = akg.tvm.const(gamma, prediction.dtype)
    one = akg.tvm.const(1, prediction.dtype)

    def cal_focalloss(*i):
        x = prediction(*i) - maxv(*i[:-1], 0)
        pred = akg.tvm.exp(x - akg.tvm.log(expsum(*i[:-1])))  # softmax(x)
        log_p = x - akg.tvm.log(expsum(*i[:-1]))  # logsoftmax(x)
        neg_pred_pow = akg.tvm.exp(akg.tvm.log(one - pred) *
                                   gamma)  # (1-pred)^gamma
        loss = akg.tvm.const(
            -1, prediction.dtype) * target(*i) * neg_pred_pow * log_p
        return loss

    loss = akg.tvm.compute(shape, cal_focalloss, name="loss")

    loss = akg.topi.sum(loss, axis=axis)

    if utils.product_is_mini() and dtype == 'float32':
        loss = akg.topi.cast(loss, "float32")

    return loss, attrs
Beispiel #6
0
def reduce_max_ad_optimized_manual_schedule(input_shape,
                                            dtype,
                                            axis,
                                            keepdims,
                                            polyhedral=True,
                                            attrs=None):
    def custom_reduce_max_fdiff(out, inputs, head_, ad_attrs, new_pld_array):
        data_ = inputs[0]
        shape = data_.shape
        # reduces maximum value for each column
        max_ = akg.lang.cce.reduce_max(data_, axis=axis, keepdims=True)
        # copies reduced values to get the original shape
        max_broadcast = akg.lang.cce.broadcast(max_, shape)
        # head broadcast is needed to generate correct cce code for the selection operation
        head_broadcast = akg.tvm.compute(
            shape, lambda *indices: head_(*get_reduced_indices(
                *indices, axis=axis, keepdims=keepdims)))
        # zero all the values that are not max values on the result, remaining is equal to the adjoint of the output
        max_values_and_zeros = akg.tvm.compute(
            shape,
            lambda *indices: akg.tvm.expr.Select(
                data_(*indices) == max_broadcast(*indices),
                head_broadcast(*indices), akg.tvm.const(0, dtype='float16')),
            name="reduce_max_ad2")
        # cast data back to the original dtype
        if dtype != 'float16':
            return [cast(max_values_and_zeros, dtype)]
        else:
            return [max_values_and_zeros]

    # tensor for the input data
    data = akg.tvm.placeholder(input_shape, dtype, name="input_data")

    # computation of reduce max
    # not used on the schedule because this is the diferentiation op
    l = reduce_max.reduce_max(data, axis, keepdims)

    # adjoint tensor for the differentiation
    head = akg.tvm.placeholder(l.shape, name="head", dtype=l.dtype)

    # cast input data
    if dtype != 'float16':
        data_cast = cast(data, "float16")
        head_cast = cast(head, "float16")
    else:
        data_cast = data
        head_cast = head

    # override differentiation computation with custom function
    [dl_ddata] = akg.differentiate(
        l, [data_cast],
        head_cast,
        None,
        None,
        override={l: ([data_cast], custom_reduce_max_fdiff)})

    # get tensors from custom function
    if dtype != 'float16':
        max_values_and_zeros = dl_ddata.op.input_tensors[0]
        max_broadcast = max_values_and_zeros.op.input_tensors[1]
        max_ = max_broadcast.op.input_tensors[0]
        head_broadcast = max_values_and_zeros.op.input_tensors[2]
    else:
        max_broadcast = dl_ddata.op.input_tensors[1]
        max_ = max_broadcast.op.input_tensors[0]
        head_broadcast = dl_ddata.op.input_tensors[2]

    # schedule for differetiation operation
    # inputs: data and head
    s = akg.tvm.create_schedule([dl_ddata.op])

    # cache reads of inputs
    if dtype != 'float16':
        head_ub = s.cache_read(head, "local.UB", [head_cast])
        data_ub = s.cache_read(data, "local.UB", [data_cast])
    else:
        # no cast operation
        head_ub = s.cache_read(head_cast, "local.UB", [head_broadcast])
        data_ub = s.cache_read(data_cast, "local.UB", [max_, dl_ddata])

    # cache write for the output
    dl_ddata_ub = s.cache_write(dl_ddata, "local.UB")

    # get tiling attributes
    if attrs is None:
        raise Exception('attrs is None')
    tiling_factors = attrs['tile']
    split_iterators = []
    assert len(tiling_factors) == len(dl_ddata.shape)
    # split the final compute and save the iterators
    for index, factor in enumerate(tiling_factors):
        split_iterators.append(s[dl_ddata].split(dl_ddata.op.axis[index],
                                                 factor))

    # get iterators
    iterator1 = split_iterators[0][0]

    # move computation of when there is a cast
    if dtype != "float16":
        s[data_cast].compute_at(s[dl_ddata], iterator1)
        s[data_cast].set_scope("local.UB")
        s[head_cast].compute_at(s[dl_ddata], iterator1)
        s[head_cast].set_scope("local.UB")
        s[max_values_and_zeros].compute_at(s[dl_ddata], iterator1)
        s[max_values_and_zeros].set_scope("local.UB")

    # move cache reads and writes
    s[data_ub].compute_at(s[dl_ddata], iterator1)
    s[head_ub].compute_at(s[dl_ddata], iterator1)
    s[dl_ddata_ub].compute_at(s[dl_ddata], iterator1)

    # move computation of the diferentiation
    s[max_].compute_at(s[dl_ddata], iterator1)
    s[max_].set_scope("local.UB")
    s[max_broadcast].compute_at(s[dl_ddata], iterator1)
    s[max_broadcast].set_scope("local.UB")
    s[head_broadcast].compute_at(s[dl_ddata], iterator1)
    s[head_broadcast].set_scope("local.UB")

    with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True):
        mod = akg.build(s, [head, data, dl_ddata],
                        "cce",
                        name="reduce_max_ad_manual_schedule",
                        attrs=attrs,
                        polyhedral=polyhedral)
        source_code = mod.imported_modules[0].get_source()
        kernel_name = "reduce_max_ad_manual_schedule"
        utils.create_cce(kernel_name, './', source_code)
    return mod
Beispiel #7
0
def reduce_max_ad(head, data, axis, keepdims):
    b = reduce_max.reduce_max(data, axis, keepdims)
    _jacs = akg.differentiate(b, [data], head)
    return _jacs[0]