def extract_multi_position_matrix_nd(bbox): bbox = nd.transpose(bbox, axes=(1, 0, 2)) xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=2) # [num_fg_classes, num_boxes, 1] bbox_width = xmax - xmin + 1. bbox_height = ymax - ymin + 1. center_x = 0.5 * (xmin + xmax) center_y = 0.5 * (ymin + ymax) # [num_fg_classes, num_boxes, num_boxes] delta_x = nd.broadcast_minus(lhs=center_x, rhs=nd.transpose(center_x, axes=(0, 2, 1))) delta_x = nd.broadcast_div(delta_x, bbox_width) delta_x = nd.log(nd.maximum(nd.abs(delta_x), 1e-3)) delta_y = nd.broadcast_minus(lhs=center_y, rhs=nd.transpose(center_y, axes=(0, 2, 1))) delta_y = nd.broadcast_div(delta_y, bbox_height) delta_y = nd.log(nd.maximum(nd.abs(delta_y), 1e-3)) delta_width = nd.broadcast_div(lhs=bbox_width, rhs=nd.transpose(bbox_width, axes=(0, 2, 1))) delta_width = nd.log(delta_width) delta_height = nd.broadcast_div(lhs=bbox_height, rhs=nd.transpose(bbox_height, axes=(0, 2, 1))) delta_height = nd.log(delta_height) concat_list = [delta_x, delta_y, delta_width, delta_height] for idx, sym in enumerate(concat_list): concat_list[idx] = nd.expand_dims(sym, axis=3) position_matrix = nd.concat(*concat_list, dim=3) return position_matrix
def get_rmse_log(net, X_train, y_train): """Gets root mse between the logarithms of the prediction and the truth.""" num_train = X_train.shape[0] clipped_preds = nd.clip(net(X_train), 1, float('inf')) return np.sqrt(2 * nd.sum( square_loss(nd.log(clipped_preds), nd.log(y_train))).asscalar() / num_train)
def forward(self, x): # Because this encoder decoder setup uses convolutional layers # There is no need to flatten anything # x.shape = (batch_size, n_channels, width, height) # Get the latent layer latent_layer = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Compute the latent variable with reparametrization trick applied eps = nd.random_normal(0, 1, shape=(x.shape[0], self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Compute the KL Divergence between latent variable and standard normal kl_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar), axis=1) # Use the decoder to generate output x_hat = self.decoder(latent_z.reshape((x.shape[0], self.n_latent, 1, 1))) # Compute the pixel-by-pixel loss; this requires that x and x_hat be flattened x_flattened = x.reshape((x.shape[0], -1)) x_hat_flattened = x_hat.reshape((x_hat.shape[0], -1)) logloss = - nd.sum(x_flattened*nd.log(x_hat_flattened + 1e-10) + (1-x_flattened)*nd.log(1-x_hat_flattened+1e-10), axis=1) # Sum up the loss loss = kl_div_loss + logloss * self.pbp_weight return loss
def _neg_loss(pred, gt): ''' Modified focal loss. Exactly the same as CornerNet. Runs faster and costs a little bit more memory Arguments: pred (batch x c x h x w) gt_regr (batch x c x h x w) ''' pos_inds = gt.__eq__(1).astype('float32') neg_inds = gt.__lt__(1).astype('float32') neg_weights = nd.power(1 - gt, 4) loss = 0 pos_loss = nd.log(pred) * nd.power(1 - pred, 2) * pos_inds neg_loss = nd.log(1 - pred) * nd.power(pred, 2) * neg_weights * neg_inds num_pos = pos_inds.astype('float32').sum() pos_loss = pos_loss.sum() neg_loss = neg_loss.sum() if num_pos == 0: loss = loss - neg_loss else: loss = loss - (pos_loss + neg_loss) / num_pos return loss
def bbox_transform(ex_rois, gt_rois): """ compute bounding box regression targets from ex_rois to gt_rois :param ex_rois: [batch_size, N, 4] :param gt_rois: [batch_size, N, 4] :return: [batch_size, N, 4] """ ex_widths = ex_rois[:, :, 2:3] - ex_rois[:, :, 0:1] + 1.0 ex_heights = ex_rois[:, :, 3:4] - ex_rois[:, :, 1:2] + 1.0 ex_ctr_x = ex_rois[:, :, 0:1] + 0.5 * (ex_widths - 1.0) ex_ctr_y = ex_rois[:, :, 1:2] + 0.5 * (ex_heights - 1.0) gt_widths = gt_rois[:, :, 2:3] - gt_rois[:, :, 0:1] + 1.0 gt_heights = gt_rois[:, :, 3:4] - gt_rois[:, :, 1:2] + 1.0 gt_ctr_x = gt_rois[:, :, 0:1] + 0.5 * (gt_widths - 1.0) gt_ctr_y = gt_rois[:, :, 1:2] + 0.5 * (gt_heights - 1.0) targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) targets_dw = nd.log(gt_widths / ex_widths) targets_dh = nd.log(gt_heights / ex_heights) targets = nd.concat(targets_dx, targets_dy, targets_dw, targets_dh, dim=-1) return targets
def label_offset(anchors, bbox, match, sample, means=(0,0,0,0), stds=(0.1,0.1,0.2,0.2), flatten=True): anchors = anchors.reshape((-1,4)) N, _ = anchors.shape B, M, _ = bbox.shape anchor_x, anchor_y, anchor_w, anchor_h = corner_to_center(anchors, split=True) bbox = bbox.reshape((B,1,M,4)) bbox = nd.broadcast_to(bbox, (B,N,M,4)) bbox = nd.stack(*[nd.pick(bbox[:,:,:,p], match) for p in range(4)], axis=-1) bbox_x, bbox_y, bbox_w, bbox_h = corner_to_center(bbox, split=True) offset_x = ((bbox_x - anchor_x) / anchor_w - means[0]) / stds[0] offset_y = ((bbox_y - anchor_y) / anchor_h - means[1]) / stds[1] offset_w = (nd.log(bbox_w/anchor_w) - means[2]) / stds[2] offset_h = (nd.log(bbox_h/anchor_h) - means[3]) / stds[3] offset = nd.concat(*(offset_x, offset_y, offset_w, offset_h), dim=-1) sample = sample.reshape((B,N,1)) sample = nd.broadcast_to(sample, (B,N,4)) > 0.5 anchor_offset = nd.where(sample, offset, nd.zeros_like(offset)) anchor_mask = nd.where(sample, nd.ones_like(offset), nd.zeros_like(offset)) if flatten: anchor_offset = anchor_offset.reshape((B,-1)) anchor_mask = anchor_mask.reshape((B,-1)) return anchor_mask, anchor_offset
def loc_difference_calculator(anchor_locs, label_locs, variances=(0.1, 0.1, 0.4, 0.4)): '''計算真實方框和預設錨框的位置差距。''' al = anchor_locs[:, 0] # anchor box; left (or: x_min) at = anchor_locs[:, 1] # anchor box; top (or: y_min) ar = anchor_locs[:, 2] # anchor box; right (or: x_max) ab = anchor_locs[:, 3] # anchor box; bottom (or: y_max) gl = label_locs[:, 0] # ground truth box; left (or: x_min) gt = label_locs[:, 1] # ground truth box; top (or: y_min) gr = label_locs[:, 2] # ground truth box; right (or: x_max) gb = label_locs[:, 3] # ground truth box; bottom (or: y_max) # 對於預設錨框,將x_min,y_min,x_max,y_max 轉換成 cx(中心點的x),cy(中心點的y),w(矩形的寬),h(矩形的高) aw = ar - al ah = ab - at ax = 0.5 * (al + ar) ay = 0.5 * (at + ab) # 對於真實方框,將x_min,y_min,x_max,y_max 轉換成 cx(中心點的x),cy(中心點的y),w(矩形的寬),h(矩形的高) gw = gr - gl gh = gb - gt gx = 0.5 * (gl + gr) gy = 0.5 * (gt + gb) # 計算預設錨框與真實方框的位置差距。 於矩形的寬和高方面,我們希望機器不要太注重。 # 故,我們以取log的方式,來縮減矩形寬和高的差距。 dx = (gx - ax) / aw / variances[0] dy = (gy - ay) / ah / variances[1] dw = nd.log(gw / aw) / variances[2] dh = nd.log(gh / ah) / variances[3] return nd.stack(dx, dy, dw, dh, axis=1)
def bayes_pred_stable(x, P_y, P_xy): log_P_xy = nd.log(P_xy) log_P_xy_neg = nd.log(1 - P_xy) log_P_y = nd.log(P_y) x = x.expand_dims(axis=0) # (28, 28) -> (1, 28, 28) p_xy = log_P_xy * x + log_P_xy_neg * (1 - x) p_xy = p_xy.reshape((10, -1)).sum(axis=1) # p(x|y) return p_xy + log_P_y
def bbox_delta(self, bbox_a, bbox_b): n = bbox_a.shape[0] result = nd.zeros((n, 4), ctx=bbox_a.context) result[:,0] = bbox_a[:,0] - bbox_b[:,0] result[:,1] = bbox_a[:,1] - bbox_b[:,1] result[:,2] = nd.log((bbox_a[:,2] - bbox_a[:,0] + 1e-8) / (bbox_b[:,2] - bbox_b[:,0] + 1e-8)) result[:,3] = nd.log((bbox_a[:,3] - bbox_a[:,1] + 1e-8) / (bbox_b[:,3] - bbox_b[:,1] + 1e-8)) return result
def select_action(self, state): with autograd.record(): probs = self.model(state.as_in_context(model_ctx)) action = nd.random.multinomial(probs) prob = probs[:, action[0]].reshape((1, -1)) log_prob = nd.log(prob) entropy = -(probs * nd.log(probs)).sum() return action[0], log_prob, entropy
def predict(img, P_xy, P_y): img = img.expand_dims(axis=0) log_P_xy = nd.log(P_xy) neg_log_P_xy = nd.log(1-P_xy) pxy = log_P_xy * img + neg_log_P_xy * (1-img) pxy = pxy.reshape((10, -1)).sum(axis=1) probs = pxy+nd.log(P_y) return probs.argmax(axis=0).asscalar()
def Treplit_hard_loss(net,data,label): label = label.reshape(-1, 1) label_mat = nd.equal(label, label.T).astype('float32') vec = net(data) dist_self = nd.sum(nd.square(vec), axis=1, keepdims=True) dist_mat = nd.broadcast_add(dist_self, dist_self.T) - 2 * nd.dot(vec, vec.T) p_min=nd.log(nd.sum(label_mat*nd.exp(dist_mat),axis=1)) p_max=nd.log(nd.sum((1-label_mat)*nd.exp(-dist_mat)),axis=1) loss=nd.relu(p_min+p_max+1) return loss
def gan_mse(p, g, device): #return (p - mx.nd.ones_like(p, ctx = device)) ** 2 if g == 'real' else (p - mx.nd.zeros_like(p, ctx = device)) ** 2 #return mx.nd.abs(p - mx.nd.ones_like(p, ctx = device)) if g == 'real' else mx.nd.abs(p - mx.nd.zeros_like(p, ctx = device)) g = mx.nd.ones_like(p) if g == 'real' else mx.nd.zeros_like(p) #g = mx.nd.ones_like(p) + mx.nd.random.normal(loc = 0, scale = 0.1, ctx = device) if g == 'real' else mx.nd.zeros_like(p) + \ # mx.random.normal(loc = 0, scale = 0.1, ctx = device) return -nd.clip(g, 0, 1) * nd.log(nd.clip( p, 1e-5, 1)) - (1 - nd.clip(g, 0, 1)) * nd.log(nd.clip(1 - p, 1e-5, 1))
def forward(self, x, first_cycle=False): # input x is image and thus 4-dimensional ndarray batch_size, n_channels_in, input_width, input_height = x.shape # First run it through the encoder x_flattened = x.reshape(batch_size, -1) latent_layer = self.encoder(x_flattened) # Split latent layer into latent mean and latent log variances latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Compute the latent variable's value using the reparametrization trick eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # At this point, also compute the KL_Divergence between latent variable and # Gaussian(0, 1) KL_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar), axis=1) # Run the latent variable through the decoder to get the flattened generated image x_hat_flattened = self.decoder(latent_z) # Inflate the flattened output to be fed into the discriminator x_hat = x_hat_flattened.reshape(batch_size, n_channels_in, input_width, input_height) # Content loss is given by the resnet # In later training process we will feed the discriminator genuine and generated images # with genuine images labeled 1 and generated images labeled 0 # in this case a higher value in ResNet's output indicate higher confidence of # an image's realness; therefore we want to reduce the negative of the ResNet's output content_loss = -nd.sigmoid(self.discriminator(x_hat)).reshape(-1) # For the first training cycle, resnet is completely not trained # so we will not use the resnet as a content loss metric; instead we will use # the logloss as a content loss if first_cycle: content_loss = -nd.sum( x_flattened * nd.log(x_hat_flattened + 1e-10) + (1 - x_flattened) * nd.log(1 - x_hat_flattened + 1e-10), axis=1) # Loss is the sum of KL_Divergence and the content loss loss = KL_div_loss + content_loss return loss
def regularisation(self): """Computes weights and dropout regularisation for the layer, has to be extracted for each layer within the model and added to the total loss """ with autograd.record(): weights_regularizer = self.weight_regularizer * self._sum_n_square( ) / (1 - self.p) dropout_regularizer = self.p * nd.log(self.p) dropout_regularizer = dropout_regularizer + ( 1. - self.p) * nd.log(1. - self.p) dropout_regularizer = dropout_regularizer * self.dropout_regularizer * self.input_dim regularizer = weights_regularizer + dropout_regularizer self.reg_all.append(regularizer) return regularizer
def loglik(self, dat, nobs=100, nmc=30, nstep=10): if nobs > dat.shape[0]: nobs = dat.shape[0] ind = np.random.choice(dat.shape[0], nobs, replace=False) samp = RBMSampler(self.w, self.b, self.c, ctx=self.ctx) loglik = 0.0 for i in range(nobs): vi = dat[ind[i]] v, h = samp.sample_k(vi.reshape(1, -1).repeat(nmc, axis=0), nstep) vmean = nd.sigmoid(nd.dot(h, self.w.T) + self.b) logp = nd.log(vmean) * vi + nd.log(1 - vmean) * (1 - vi) logp = nd.sum(logp, axis=1) loglik += log_sum_exp(logp) return loglik - nobs * math.log(nmc)
def __init__(self, droprate_init, temperature, limit_lo, limit_hi, **kwargs): super(Gate, self).__init__(**kwargs) self._temperature = nd.array([temperature]) self._limit_lo = nd.array([limit_lo]) self._limit_hi = nd.array([limit_hi]) droprate_init = nd.array([droprate_init]) qz_loga = nd.log(1 - droprate_init) - nd.log(droprate_init) with self.name_scope(): self._qz_loga = self.params.get("qz_loga", init=mx.init.Constant(qz_loga), allow_deferred_init=True) self._init_param("qz_loga", qz_loga)
def forward(self, x=0): if (mx.autograd.is_training()): u = nd.random.uniform(0, 1) s = nd.log(u) - nd.log(1 - u) + self._qz_loga.data() if (self._temperature == 0): s = nd.sign(s) else: s = nd.sigmoid(s / self._temperature) else: s = nd.sigmoid(self._qz_loga.data()) s = s * (self._limit_hi - self._limit_lo) + self._limit_lo return nd.minimum(1, nd.maximum(s, 0))
def poisson(self, n, lam): r""" The continous approximation, using :math:`n! = \Gamma\left(n+1\right)`, to the probability mass function of the Poisson distribution evaluated at :code:`n` given the parameter :code:`lam`. Example: >>> import pyhf >>> pyhf.set_backend(pyhf.tensor.mxnet_backend()) >>> pyhf.tensorlib.poisson(5., 6.) <BLANKLINE> [0.16062315] <NDArray 1 @cpu(0)> Args: n (Number or Tensor): The value at which to evaluate the approximation to the Poisson distribution p.m.f. (the observed number of events) lam (Number or Tensor): The mean of the Poisson distribution p.d.f. (the expected number of events) Returns: MXNet NDArray: Value of the continous approximation to Poisson(n|lam) """ n = self.astensor(n) lam = self.astensor(lam) # This is currently copied directly from PyTorch's source until a better # way can be found to do this in MXNet # https://github.com/pytorch/pytorch/blob/39520ffec15ab7e97691fed048de1832e83785e8/torch/distributions/poisson.py#L59-L63 return nd.exp((nd.log(lam) * n) - lam - nd.gammaln(n + 1.0))
def dynamic_range_compression(x, c=1, clip_val=1e-5): """ params ------ c: compression factor """ return nd.log(nd.clip(x, a_min=clip_val, a_max=x.max().asscalar())) * c
def forward(self, pred, target): """ pred is the output prob,target the multi-class set label """ batch, dim = pred.shape dist = nd.broadcast_minus(pred.reshape(batch, dim, 1), pred.reshape(batch, 1, dim)) pos = mxnet.nd.greater(target, 0).reshape(batch, dim, 1) neg = mxnet.nd.equal(target, 0).reshape(batch, 1, dim) pos.detach() neg.detach() # pos_matrix = mxnet.nd.concat(*([pos]*dim),dim=2) # neg_matrix = mxnet.nd.concat(*([neg]*dim),dim=1) #print(pos_matrix.shape,neg_matrix.shape,dist.shape) #loss_matrix = nd.log(1+nd.sum(pos_matrix*neg_matrix*nd.exp(-dist))) # print("----------------------") # print("pos is ",pos) # print("neg is ",neg) # print("multiply is ",nd.broadcast_mul(pos,neg)) # print("the distance is ",dist) # print("the mat mul is ",nd.broadcast_mul(pos,neg)*dist) # print("-----------------------") loss_matrix = nd.log( 1 + nd.sum(nd.broadcast_mul(pos, neg) * nd.exp(-dist))) return loss_matrix
def hybrid_forward(self, F, output, *args, **kwargs): ''' Returns the Softmax Cross Entropy loss of a model with a graph vocab, in the style of a sentinel pointer network Note: Unlike VarNamingLoss, this Loss DOES expect the last dimension of output to be probabilities summing to 1 ''' (label, _), data_encoder = args joint_label, label_lengths = label.values, label.value_lengths # We're using pick and not just sparse labels for XEnt b/c there can be multiple ways to point to the correct subtoken loss = nd.pick(output, joint_label, axis=2) # Masking outputs to max(length_of_output (based on emitting value 0), length_of_label) output_preds = nd.argmax(output, axis=2).asnumpy() output_lengths = [] for row in output_preds: end_token_idxs = np.where(row == 0)[0] if len(end_token_idxs): output_lengths.append(int(min(end_token_idxs)) + 1) else: output_lengths.append(output.shape[1]) output_lengths = nd.array(output_lengths, ctx=output.context) mask_lengths = nd.maximum(output_lengths, label_lengths) loss = nd.SequenceMask(loss, value=1.0, use_sequence_length=True, sequence_length=mask_lengths, axis=1) return nd.mean(-nd.log(loss), axis=0, exclude=True)
def forward(self, rcnn_cls_pred, rcnn_bbox_pred, rcnn_cls_gt, rcnn_bbox_gt): with autograd.pause(): ctx = rcnn_cls_pred.context roi_num = rcnn_cls_pred.shape[0] roi_idx = nd.arange(roi_num, ctx=ctx).reshape(-1, 1) fg_bbox_mask = (rcnn_cls_gt > 0).reshape(0, 1, 1) bbox_weights = nd.zeros_like(rcnn_bbox_gt).reshape(0, -1, 4) bbox_weights[roi_idx, rcnn_cls_gt[:], :] = \ self._bbox_weights.data(ctx).broadcast_to((roi_num, 1, 4)) * fg_bbox_mask bbox_weights = bbox_weights.reshape(0, -1) # rcnn_cls_pred.shape (roi_num, num_classes) rcnn_cls_log = nd.log(nd.clip(rcnn_cls_pred, 1e-14, 1)) cls_log_loss = -nd.sum(rcnn_cls_log[ roi_idx, rcnn_cls_gt]) / self._roi_batch_size.data(ctx) # rcnn_bbox_pred.shape (roi_num, num_classes*4) rcnn_bbox_smooth_l1 = nd.smooth_l1(rcnn_bbox_pred - rcnn_bbox_gt, scalar=1.0) bbox_smooth_l1_loss = nd.sum( rcnn_bbox_smooth_l1 * bbox_weights) / self._roi_batch_size.data(ctx) return cls_log_loss, bbox_smooth_l1_loss
def coordinate_distance(target, label): target_xy, target_wh = nd.split(target, 2, -1) label_xy, label_wh = nd.split(label, 2, -1) dxy = target_xy - label_xy dwh = nd.log(target_wh / label_wh) distance = nd.concat(dxy, dwh, dim=-1) return distance
def sample(match, cls_pred, iou, ratio=3, min_sample=0, threshold=0.5, do=True): if do is False: ones = nd.ones_like(match) sample = nd.where(match > -0.5, ones, ones*-1) return sample sample = nd.zeros_like(match) num_pos = nd.sum(match > -0.5, axis=-1) requre_neg = ratio * num_pos neg_mask = nd.where(match < -0.5, nd.max(iou, axis=-1) < threshold, sample) max_neg = neg_mask.sum(axis=-1) num_neg = nd.minimum(max_neg, nd.maximum(requre_neg, min_sample)).astype('int') neg_prob = cls_pred[:,:,0] max_value = nd.max(cls_pred, axis=-1, keepdims=True) score = max_value[:,:,0] - neg_prob + nd.log( nd.sum( nd.exp(cls_pred-max_value), axis=-1)) score = nd.where(neg_mask, score, nd.zeros_like(score)) argmax = nd.argsort(score, axis=-1, is_ascend=False) sample = nd.where(match > -0.5, nd.ones_like(sample), sample) for i, num in enumerate(num_neg): sample[i, argmax[i,:num.asscalar()]] = -1 return sample
def forward(self, C): # input value is of shape (batch_size, num_matches*emb_size, num_candidates), # with num_matches=3, num_candidates=2 in our case exp_C = nd.exp(nd.dot(C.transpose(axes=(0, 2, 1)), self.V.data())) # L(A_i | P, Q) = -log(exp(V^T C_i) / exp(V^T C)) L = -nd.log(exp_C / nd.sum(exp_C, axis=-1, keepdims=True)) return L
def scale_mixture_prior(self, x): sigma_p1 = nd.array([self.sigma_p1], ctx=ctx) sigma_p2 = nd.array([self.sigma_p2], ctx=ctx) pi = self.pi first_gaussian = pi * self.gaussian(x, 0., sigma_p1) second_gaussian = (1 - pi) * self.gaussian(x, 0., sigma_p2) return nd.log(first_gaussian + second_gaussian)
def __init__(self, parent=None, min_list=None, max_list=None, tau=None, **kwargs): super(Box, self).__init__(**kwargs) inf = -1 * nd.log(nd.array([0])) with self.name_scope(): self._parent = parent self._min_list = self.params.get("min_list", grad_req="null", init=mx.init.Constant(min_list), allow_deferred_init=True) self._max_list = self.params.get("max_list", grad_req="null", init=mx.init.Constant(max_list), allow_deferred_init=True) self._tau = self.params.get( "tau", grad_req="null", init=mx.init.Constant(tau if tau is not None else inf), allow_deferred_init=True) self._init_param("min_list", min_list) self._init_param("max_list", max_list) self._init_param("tau", tau if tau is not None else inf)
def log_prob(self, x: nd.NDArray) -> nd.NDArray: mean = self.get_param_maybe_repeated('mean') if x.ndim > mean.ndim: mean = nd.expand_dims(mean, 0) np_x = x.asnumpy().astype(np.int32).astype(np.float32) np.testing.assert_almost_equal(x.asnumpy(), np_x) return x * nd.log(mean) - mean - nd.gammaln(x + 1.)
def test_exponent_logarithm_operators(): a = 2 * nd.ones(shape=LARGE_X) # exponent result = nd.exp(a) assert result[-1] == 7.389056 assert result.shape == a.shape # exponent minus 1 result = nd.expm1(a) assert result[-1] == 6.389056 assert result.shape == a.shape # log2 result = nd.log2(a) assert result[-1] == 1 assert result.shape == a.shape # log10 result = nd.log10(a) assert result[-1] == 0.30103 assert result.shape == a.shape # log1p result = nd.log1p(a) assert result[-1] == 1.0986123 assert result.shape == a.shape # log result = nd.log(a) assert result[-1] == 0.6931472 assert result.shape == a.shape
def cross_entropy(yhat, y): return - nd.mean(nd.sum(y * nd.log(yhat), axis=0, exclude=True))