def softmax_cross_entropy_with_logits(labels, logits, axis, reduction="mean", scale=1.0): max_logits = reduce_max(logits, axis, keepdims=True, target=utils.CCE) data_sub = sub(logits, max_logits, target=utils.CCE) akg.register_variables("minus_max", [logits], data_sub) data_exp = Exp(data_sub, target=utils.CCE) data_expsum = sum(data_exp, axis, keepdims=True, target=utils.CCE) data_expsum_log = log(data_expsum, target=utils.CCE) sub_value = sub(data_sub, data_expsum_log, target=utils.CCE) neg_labels = neg(labels, target=utils.CCE) cross_entropy = mul(neg_labels, sub_value, target=utils.CCE) # backprop: prob - labels, where prob = softmax(logits) prob = Exp(sub_value, target=utils.CCE) backprop = sub(prob, labels, target=utils.CCE) if reduction.lower() == "none": loss = sum_v2(cross_entropy, axis, keepdims=True) elif reduction.lower() == "mean": loss = sum_v2(cross_entropy, axis=None) factor = logits.shape[0].value loss = loss * akg.tvm.const(1 / factor, logits.dtype) backprop = backprop * akg.tvm.const(1 / factor, logits.dtype) elif reduction.lower() == "sum": loss = sum_v2(cross_entropy, axis=None) else: raise ValueError( "reduction method {0} is not supported".format(reduction)) backprop = akg.topi.multiply(backprop, akg.tvm.const(scale, backprop.dtype)) return loss, backprop
def _compute_log(data_input): """Atanh(x) = 0.5*log((1+x)/(1-x))""" data_1_sum_x = topi.add(data_input, dc.one_const(data_input.dtype)) data_sub_x = topi.multiply(data_input, dc.neg_one_const(data_input.dtype)) data_1_sub_x = topi.add(data_sub_x, dc.one_const(data_input.dtype)) data_x_mul = data_1_sum_x / data_1_sub_x data_x_log = log.log(data_x_mul) data_res = topi.multiply(data_x_log, dc.half_const(data_input.dtype)) return data_res
def sigmoid_cross_entropy_with_logits(labels=None, logits=None): ## # \brief Computes sigmoid cross entropy given `logits`. # # \f[ # cost = lables * -log(sigmoid(logits)) + (1 - lables) * -log(1 - sigmoid(logits)) # \f] # \param labels akg.tvm.Tensor of the same type and shape as `logits`. # \param logits akg.tvm.Tensor of type float16, float32 # # \return akg.tvm.Tensor of the same shape as `logits` with the componentwise logistic losses. ## if get_shape(logits) != get_shape(labels): raise ValueError( "logits and labels must have the same shape (%s vs %s)" % (get_shape(logits), get_shape(labels))) if logits.dtype != labels.dtype: raise ValueError( "logits and labels must have the same dtype (%s vs %s)" % (logits.dtype, labels.dtype)) shape = logits.shape dtype = logits.dtype check_list = ["float16", "float32"] if not (dtype.lower() in check_list): raise RuntimeError( "sigmoid_cross_entropy_with_logits only support %s while dtype is %s" % (",".join(check_list), dtype)) # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) # = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) # = max(x, 0) - x * z + log(1 + exp(-abs(x))) zero = akg.tvm.const(0, dtype=dtype) relu_logits = akg.tvm.compute( shape, lambda *indice: akg.tvm.expr.Select( logits(*indice) < zero, zero, logits(*indice)), name="relu_logits") neg_abs_logits = akg.tvm.compute( shape, lambda *indice: akg.tvm.expr.Select( logits(*indice) < zero, logits(*indice), logits(*indice) * -1), name="neg_abs_logits") sigmoid_logits = exp(neg_abs_logits) + akg.tvm.const(1, dtype=dtype) ln_sigmoid_logits = log(sigmoid_logits) logits_mul_lables = mul(logits, labels) res = relu_logits - logits_mul_lables + ln_sigmoid_logits return res
def pow_compute(input_x, input_y, data): """ :param input_x: :param input_y: :return: exp(input_y * ln(input_x)) """ input_x_broadcast = akg.lang.ascend.broadcast(input_x, data.shape) log_value = log(input_x_broadcast, utils.CCE) mul_value = topi.multiply(input_y, log_value) res = Exp(mul_value, utils.CCE) return res
def log_ad(head, in_data): """ Compute gradient of log operator using automatic differentiate. Args: head (tvm.tensor.Tensor): Tensor of type float16, float32. in_data (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor has the same shape as input. """ # check head's validation. vc_util.check_shape(head.shape) vc_util.ops_dtype_check(head.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) b = log.log(in_data) jacs = list(akg.differentiate(b, [in_data], head)) return jacs[0]
def log_compute_mini_impl(x): """log compute on mini for x >= 1""" # compute method: # As vlog instruction has some precision problems when x in interval [1,2), the taylor method be used to # calculate log value of x. # For x in interval [1, 4/3), calculate log value of x by the Taylor formula: # log(1+x) = ((((0.2x - 0.25)x + 0.33333)x - 0.5)x + 1)x. # For x in interval [4/3, 5/3) and [5/3, 2), x are mapped to in interval [1, 4/3), by the following formulas: # [4/3, 5/3) -> log(x * 3/4) + log(4/3), # [5/3, 2) -> log(x * 3/5) + log(5/3). # For x in interval [2, 32768), calculate log value of x by vlog instruction directly: # [2, 32768) -> log(x). # As vlog instruction has overflow problems when x greater or equal to 32768, calculate log value of x # by the following formulas: # [32768, ) -> log(x/2.5) + log(2.5). thresholds = [4 / 3, 5 / 3, 2, 32768] thresholds_rec = [3 / 4, 3 / 5] log_thresholds = [0.28768207245178085, 0.5108256237659907] overflow_div_coffient = 2.5 log_overflow_div_coffient = 0.916290731874155 def _log_taylor(data): """algrithm: log(1+x) = ((((0.2x - 0.25)x + 0.33333)x - 0.5)x + 1)x""" data = topi.subtract(data, 1) taylor_params = [0.2, -0.25, 1 / 3, -0.5, 1] taylor_five = topi.multiply(data, taylor_params[0]) taylor_four_1 = topi.add(taylor_five, taylor_params[1]) taylor_four_2 = topi.multiply(taylor_four_1, data) taylor_three_1 = topi.add(taylor_four_2, taylor_params[2]) taylor_three_2 = topi.multiply(taylor_three_1, data) taylor_two_1 = topi.add(taylor_three_2, taylor_params[3]) taylor_two_2 = topi.multiply(taylor_two_1, data) taylor_one = topi.add(taylor_two_2, taylor_params[4]) taylor = topi.multiply(taylor_one, data) return taylor # taylor shape = x.shape threshold_2 = tvm.const(thresholds[1], "float16") threshold_1 = tvm.const(thresholds[0], "float16") threshold_2_rec = tvm.const(thresholds_rec[1], "float16") threshold_1_rec = tvm.const(thresholds_rec[0], "float16") x_fp16 = topi.cast(x, "float16") x_1 = tvm.compute(shape, lambda *indice: tvm.expr.Select( x_fp16(*indice) >= threshold_2, x_fp16(*indice) * threshold_2_rec, x_fp16(*indice)), name="x_1") x_2 = tvm.compute(shape, lambda *indice: tvm.expr.Select( x_1(*indice) >= threshold_1, x_1(*indice) * threshold_1_rec, x_1(*indice)), name="x_2") taylor = _log_taylor(topi.cast(x_2, "float32")) log_threshold_1 = log_thresholds[0] log_threshold_2 = log_thresholds[1] taylor_add_log_threshold_1_fp16 = topi.cast( topi.add(taylor, log_threshold_1), "float16") taylor_add_log_threshold_2_fp16 = topi.cast( topi.add(taylor, log_threshold_2), "float16") res = tvm.compute(shape, lambda *indice: tvm.expr.Select( x_1(*indice) >= threshold_1, taylor_add_log_threshold_1_fp16(*indice), taylor(*indice).astype("float16")), name="res_1") res = tvm.compute( shape, lambda *indice: tvm.expr.Select( x_fp16(*indice) >= threshold_2, taylor_add_log_threshold_2_fp16(*indice), res(*indice)), name="res_2") # vlog x_log = log.log(x_fp16) res = tvm.compute( shape, lambda *indice: tvm.expr.Select( x_fp16(*indice) >= thresholds[2], x_log(*indice), res(*indice)), name="res_3") # overflow overflow_threshold = tvm.const(thresholds[3], "float16") res_overflow = topi.cast( topi.add(log.log(topi.multiply(x, 1 / overflow_div_coffient)), log_overflow_div_coffient), "float16") res = tvm.compute(shape, lambda *indice: tvm.expr.Select( x_fp16(*indice) >= overflow_threshold, res_overflow(*indice), res(*indice)), name="res_4") if res.dtype != x.dtype: res = topi.cast(res, x.dtype) return res