Beispiel #1
0
    def softmax_cross_entropy_with_logits(labels,
                                          logits,
                                          axis,
                                          reduction="mean",
                                          scale=1.0):
        max_logits = reduce_max(logits, axis, keepdims=True, target=utils.CCE)
        data_sub = sub(logits, max_logits, target=utils.CCE)
        akg.register_variables("minus_max", [logits], data_sub)
        data_exp = Exp(data_sub, target=utils.CCE)
        data_expsum = sum(data_exp, axis, keepdims=True, target=utils.CCE)
        data_expsum_log = log(data_expsum, target=utils.CCE)
        sub_value = sub(data_sub, data_expsum_log, target=utils.CCE)
        neg_labels = neg(labels, target=utils.CCE)
        cross_entropy = mul(neg_labels, sub_value, target=utils.CCE)
        # backprop: prob - labels, where prob = softmax(logits)
        prob = Exp(sub_value, target=utils.CCE)
        backprop = sub(prob, labels, target=utils.CCE)

        if reduction.lower() == "none":
            loss = sum_v2(cross_entropy, axis, keepdims=True)
        elif reduction.lower() == "mean":
            loss = sum_v2(cross_entropy, axis=None)
            factor = logits.shape[0].value
            loss = loss * akg.tvm.const(1 / factor, logits.dtype)
            backprop = backprop * akg.tvm.const(1 / factor, logits.dtype)
        elif reduction.lower() == "sum":
            loss = sum_v2(cross_entropy, axis=None)
        else:
            raise ValueError(
                "reduction method {0} is not supported".format(reduction))
        backprop = akg.topi.multiply(backprop,
                                     akg.tvm.const(scale, backprop.dtype))
        return loss, backprop
Beispiel #2
0
def _compute_log(data_input):
    """Atanh(x) = 0.5*log((1+x)/(1-x))"""

    data_1_sum_x = topi.add(data_input, dc.one_const(data_input.dtype))
    data_sub_x = topi.multiply(data_input, dc.neg_one_const(data_input.dtype))
    data_1_sub_x = topi.add(data_sub_x, dc.one_const(data_input.dtype))
    data_x_mul = data_1_sum_x / data_1_sub_x
    data_x_log = log.log(data_x_mul)
    data_res = topi.multiply(data_x_log, dc.half_const(data_input.dtype))

    return data_res
Beispiel #3
0
def sigmoid_cross_entropy_with_logits(labels=None, logits=None):
    ##
    # \brief Computes sigmoid cross entropy given `logits`.
    #
    # \f[
    #   cost = lables * -log(sigmoid(logits)) + (1 - lables) * -log(1 - sigmoid(logits))
    # \f]
    # \param labels akg.tvm.Tensor of the same type and shape as `logits`.
    # \param  logits akg.tvm.Tensor of type float16, float32
    #
    # \return akg.tvm.Tensor of the same shape as `logits` with the componentwise logistic losses.
    ##

    if get_shape(logits) != get_shape(labels):
        raise ValueError(
            "logits and labels must have the same shape  (%s vs %s)" %
            (get_shape(logits), get_shape(labels)))
    if logits.dtype != labels.dtype:
        raise ValueError(
            "logits and labels must have the same dtype  (%s vs %s)" %
            (logits.dtype, labels.dtype))

    shape = logits.shape
    dtype = logits.dtype

    check_list = ["float16", "float32"]
    if not (dtype.lower() in check_list):
        raise RuntimeError(
            "sigmoid_cross_entropy_with_logits only support %s while dtype is %s"
            % (",".join(check_list), dtype))

    #    z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
    # =  z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
    # =  max(x, 0) - x * z + log(1 + exp(-abs(x)))

    zero = akg.tvm.const(0, dtype=dtype)
    relu_logits = akg.tvm.compute(
        shape,
        lambda *indice: akg.tvm.expr.Select(
            logits(*indice) < zero, zero, logits(*indice)),
        name="relu_logits")
    neg_abs_logits = akg.tvm.compute(
        shape,
        lambda *indice: akg.tvm.expr.Select(
            logits(*indice) < zero, logits(*indice),
            logits(*indice) * -1),
        name="neg_abs_logits")
    sigmoid_logits = exp(neg_abs_logits) + akg.tvm.const(1, dtype=dtype)
    ln_sigmoid_logits = log(sigmoid_logits)
    logits_mul_lables = mul(logits, labels)
    res = relu_logits - logits_mul_lables + ln_sigmoid_logits
    return res
Beispiel #4
0
def pow_compute(input_x, input_y, data):
    """
    :param input_x:
    :param input_y:
    :return: exp(input_y * ln(input_x))
    """

    input_x_broadcast = akg.lang.ascend.broadcast(input_x, data.shape)
    log_value = log(input_x_broadcast, utils.CCE)
    mul_value = topi.multiply(input_y, log_value)
    res = Exp(mul_value, utils.CCE)

    return res
Beispiel #5
0
def log_ad(head, in_data):
    """
    Compute gradient of log operator using automatic differentiate.

    Args:
        head (tvm.tensor.Tensor): Tensor of type float16, float32.
        in_data (tvm.tensor.Tensor): Tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor has the same shape as input.
    """

    # check head's validation.
    vc_util.check_shape(head.shape)
    vc_util.ops_dtype_check(head.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    b = log.log(in_data)
    jacs = list(akg.differentiate(b, [in_data], head))
    return jacs[0]
Beispiel #6
0
def log_compute_mini_impl(x):
    """log compute on mini for x >= 1"""

    # compute method:
    # As vlog instruction has some precision problems when x in interval [1,2), the taylor method be used to
    # calculate log value of x.
    # For x in interval [1, 4/3),  calculate log value of x by the Taylor formula:
    # log(1+x) = ((((0.2x - 0.25)x + 0.33333)x - 0.5)x + 1)x.
    # For x in interval [4/3, 5/3) and [5/3, 2), x are mapped to in interval [1, 4/3), by the following formulas:
    # [4/3, 5/3) -> log(x * 3/4) + log(4/3),
    # [5/3, 2) -> log(x * 3/5) + log(5/3).
    # For x in interval [2, 32768), calculate log value of x by vlog instruction directly:
    # [2, 32768) -> log(x).
    # As vlog instruction has overflow problems when x greater or equal to 32768, calculate log value of x
    # by the following formulas:
    # [32768, ) -> log(x/2.5) + log(2.5).
    thresholds = [4 / 3, 5 / 3, 2, 32768]
    thresholds_rec = [3 / 4, 3 / 5]
    log_thresholds = [0.28768207245178085, 0.5108256237659907]
    overflow_div_coffient = 2.5
    log_overflow_div_coffient = 0.916290731874155

    def _log_taylor(data):
        """algrithm: log(1+x) = ((((0.2x - 0.25)x + 0.33333)x - 0.5)x + 1)x"""
        data = topi.subtract(data, 1)
        taylor_params = [0.2, -0.25, 1 / 3, -0.5, 1]
        taylor_five = topi.multiply(data, taylor_params[0])
        taylor_four_1 = topi.add(taylor_five, taylor_params[1])
        taylor_four_2 = topi.multiply(taylor_four_1, data)
        taylor_three_1 = topi.add(taylor_four_2, taylor_params[2])
        taylor_three_2 = topi.multiply(taylor_three_1, data)
        taylor_two_1 = topi.add(taylor_three_2, taylor_params[3])
        taylor_two_2 = topi.multiply(taylor_two_1, data)
        taylor_one = topi.add(taylor_two_2, taylor_params[4])
        taylor = topi.multiply(taylor_one, data)
        return taylor

    # taylor
    shape = x.shape
    threshold_2 = tvm.const(thresholds[1], "float16")
    threshold_1 = tvm.const(thresholds[0], "float16")
    threshold_2_rec = tvm.const(thresholds_rec[1], "float16")
    threshold_1_rec = tvm.const(thresholds_rec[0], "float16")
    x_fp16 = topi.cast(x, "float16")
    x_1 = tvm.compute(shape,
                      lambda *indice: tvm.expr.Select(
                          x_fp16(*indice) >= threshold_2,
                          x_fp16(*indice) * threshold_2_rec, x_fp16(*indice)),
                      name="x_1")
    x_2 = tvm.compute(shape,
                      lambda *indice: tvm.expr.Select(
                          x_1(*indice) >= threshold_1,
                          x_1(*indice) * threshold_1_rec, x_1(*indice)),
                      name="x_2")
    taylor = _log_taylor(topi.cast(x_2, "float32"))
    log_threshold_1 = log_thresholds[0]
    log_threshold_2 = log_thresholds[1]
    taylor_add_log_threshold_1_fp16 = topi.cast(
        topi.add(taylor, log_threshold_1), "float16")
    taylor_add_log_threshold_2_fp16 = topi.cast(
        topi.add(taylor, log_threshold_2), "float16")
    res = tvm.compute(shape,
                      lambda *indice: tvm.expr.Select(
                          x_1(*indice) >= threshold_1,
                          taylor_add_log_threshold_1_fp16(*indice),
                          taylor(*indice).astype("float16")),
                      name="res_1")
    res = tvm.compute(
        shape,
        lambda *indice: tvm.expr.Select(
            x_fp16(*indice) >= threshold_2,
            taylor_add_log_threshold_2_fp16(*indice), res(*indice)),
        name="res_2")

    # vlog
    x_log = log.log(x_fp16)
    res = tvm.compute(
        shape,
        lambda *indice: tvm.expr.Select(
            x_fp16(*indice) >= thresholds[2], x_log(*indice), res(*indice)),
        name="res_3")

    # overflow
    overflow_threshold = tvm.const(thresholds[3], "float16")
    res_overflow = topi.cast(
        topi.add(log.log(topi.multiply(x, 1 / overflow_div_coffient)),
                 log_overflow_div_coffient), "float16")
    res = tvm.compute(shape,
                      lambda *indice: tvm.expr.Select(
                          x_fp16(*indice) >= overflow_threshold,
                          res_overflow(*indice), res(*indice)),
                      name="res_4")
    if res.dtype != x.dtype:
        res = topi.cast(res, x.dtype)
    return res