def hybrid_forward(self, F, supports, queries, sample_weight=None): """ Computes prototypical loss :param F: :param supports: <Nc*Ns x E> :param queries: <Nc*Nq x E> :return: """ supports = F.reshape(supports, (self.nc, self.ns, -1)) # <Nc x Ns x E> prototypes = F.mean(supports, axis=1) # <Nc x E> # Compute distance between queries and prototypes square_queries = queries.square().sum(axis=1, keepdims=True) square_prototypes = prototypes.square().sum(axis=1, keepdims=True) # <Nc*Ns x 1> pairwise_distance_square = square_queries + square_prototypes.transpose() - 2.0 * ( F.dot(queries, prototypes.transpose())) # <Nc*Nq x Nc> # We construct the labels based on sampled clusters labels = F.repeat(F.arange(self.nc), self.nq) pred = F.log_softmax(-pairwise_distance_square, self.axis) loss = -F.pick(pred, labels, axis=self.axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): #각 label 문장의 마지막 문자('END') 인덱스 정보 추출 label = F.cast(label, dtype='float32') label_sent_length = F.argmax(F.where(label == self.end_idx, F.ones_like(label), F.zeros_like(label)), axis=1) if not self._from_logits: pred = F.log_softmax(pred, self._axis) if self._sparse_label: loss = -F.pick(pred, label, axis=self._axis, keepdims=True) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred * label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) #(N, 30, val) #길이를 초과하는 영역에 대해서 0로 loss 마스킹을 수행함 loss = F.transpose(loss, (1, 0, 2)) loss = F.SequenceMask(loss, sequence_length=label_sent_length + 1, use_sequence_length=True) loss = F.transpose(loss, (1, 0, 2)) return F.sum(loss, axis=self._batch_axis, exclude=True) / (label_sent_length + 1)
def hybrid_forward(self, F, pred, labels, positive_proxy, negative_proxies): """ :param F: :param pred: BxE :param positive_proxy: BxE :param negative_proxies: B x (C-1) x E :return: """ beta = self._beta(labels).squeeze() # <B> beta_b = F.repeat(beta, repeats=self._num_classes - 1, axis=0) beta_reg_loss = F.sum(beta) * self._nu positive_proxy = F.L2Normalization(positive_proxy) # BxE pred_b = F.repeat(pred, repeats=self._num_classes - 1, axis=0) # B*(C-1) x E # positive_proxy_b = F.repeat(positive_proxy, repeats=self._num_classes - 1, axis=0) # B*(C-1) x E negative_proxies_b = F.reshape_like(negative_proxies, pred_b) # B*(C-1) x E negative_proxies_b = F.L2Normalization(negative_proxies_b) # B*(C-1) x E d_ap = F.sum(F.square(positive_proxy - pred), axis=1) # B d_ap = F.repeat(d_ap, repeats=self._num_classes - 1, axis=0) # B*(C-1) d_an = F.sum(F.square(negative_proxies_b - pred_b), axis=1) # B*(C-1) pos_loss = F.relu(d_ap - beta_b + self._margin) neg_loss = F.relu(beta_b - d_an + self._margin) pair_cnt = F.sum((pos_loss > 0.0) + (neg_loss > 0.0)) loss = (F.sum(pos_loss + neg_loss) + beta_reg_loss) / pair_cnt # loss = pos_loss + neg_loss + beta_reg_loss # pair_cnt = F.sum(loss > 0.0) return _apply_weighting(F, loss, self._weight, None)
def hybrid_forward( self, F, true_posterior_arr, generated_posterior_arr, sample_weight=None ): ''' Forward propagation, computing losses. Args: F: `mxnet.ndarray` or `mxnet.symbol`. true_posterior_arr: `mxnet.ndarray` or `mxnet.symbol` of true posterior inferenced by the discriminator. generated_posterior_arr: `mxnet.ndarray` or `mxnet.symbol` of fake posterior inferenced by the generator. Returns: `mxnet.ndarray` or `mxnet.symbol` of loss. ''' loss = true_posterior_arr + F.maximum(0, self.__margin - generated_posterior_arr) loss = _apply_weighting(F, loss, self._weight, sample_weight) self.epoch += 1 if self.epoch % self.__margin_decay_epoch == 0: self.__margin = self.__margin * self.__margin_decay_rate return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None, pos_weight=None): label = _reshape_like(F, label, pred) if not self._from_sigmoid: if pos_weight is None: # We use the stable formula: max(x, 0) - x * z + log(1 + exp(-abs(x))) loss = F.relu(pred) - pred * label + F.Activation( -F.abs(pred), act_type='softrelu') else: # We use the stable formula: x - x * z + (1 + z * pos_weight - z) * \ # (log(1 + exp(-abs(x))) + max(-x, 0)) log_weight = 1 + F.broadcast_mul(pos_weight - 1, label) loss = pred - pred * label + log_weight * (F.Activation( -F.abs(pred), act_type='softrelu') + F.relu(-pred)) else: eps = 1e-12 if pos_weight is None: loss = -(F.log(pred + eps) * label + F.log(1. - pred + eps) * (1. - label)) else: loss = -( F.broadcast_mul(F.log(pred + eps) * label, pos_weight) + F.log(1. - pred + eps) * (1. - label)) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): if not self._from_logits: pred = F.sigmoid(pred) one_hot = label > 0 pt = F.where(one_hot, pred, 1 - pred) t = F.ones_like(one_hot) alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t) beta = (1 - pt)**self._gamma if self._normalize: t_sum = F.sum(t, axis=(-2, -1), keepdims=True) beta_sum = F.sum(beta, axis=(-2, -1), keepdims=True) mult = t_sum / (beta_sum + self._eps) beta = F.broadcast_mul(beta, mult) self._k_sum = 0.9 * self._k_sum + 0.1 * mult.asnumpy().mean() loss = -alpha * beta * F.log(F.minimum(pt + self._eps, 1)) sample_weight = label != -1 loss = _apply_weighting(F, loss, self._weight, sample_weight) if self._size_average: tsum = F.sum(sample_weight, axis=self._batch_axis, exclude=True) loss = F.sum(loss, axis=self._batch_axis, exclude=True) / (tsum + self._eps) else: loss = F.sum(loss, axis=self._batch_axis, exclude=True) return self._scale * loss
def hybrid_forward(self, F, pred, label, sample_weight=None): """Forward""" pred = F.log(pred) if self._sparse_label: loss = -F.pick(pred, label, axis=self._axis, keepdims=True) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred*label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): log_pred = F.log_softmax(pred, self._axis) chosen_log_pred = F.pick(log_pred, label, axis=self._axis, keepdims=True) chosen_pred = F.exp(chosen_log_pred) loss = -self._alpha * (1 - chosen_pred)**self._gamma * chosen_log_pred loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): if not self._from_logits: pred = F.log_softmax(pred, self._axis) if self._sparse_label: loss = -F.pick(pred, label, axis=self._axis, keepdims=True) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred * label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) diceloss = self.dice_loss(F, pred, label) return F.mean(loss, axis=self._batch_axis, exclude=True) + diceloss
def hybrid_forward(self, F, output, label, sample_weight=None): if not self._from_logits: output = F.log_softmax(output, axis=self._axis) if self._sparse_label: valid_label_map = (label != self._ignore_label).astype('float32') loss = -(F.pick(output, label, axis=self._axis, keepdims=True) * valid_label_map) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred*label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True) * \ valid_label_map.size / F.sum(valid_label_map)
def hybrid_forward(self, F, anchors, positives, labels, sample_weight=None): """ Computes the loss on the given data :param F: mx.nd or mx.sym :param anchors: anchor embeddings, <BxE> where B: batch size, E: embedding dimension :param positives: positive embeddings, same shape and label than anchors <BxE> :param labels: Labels of embeddings <B> :param sample_weight: weights of logits, see mx.loss :return: """ reg_anchor = F.mean(F.sum(anchors.square(), axis=1), axis=self._batch_axis, exclude=True) reg_positive = F.mean(F.sum(positives.square(), axis=1), axis=self._batch_axis, exclude=True) l2loss = self._l2_reg * (reg_anchor + reg_positive) # Get per pair similarities. similarity_matrix = F.dot(anchors, positives, transpose_a=False, transpose_b=True) labels = labels.expand_dims(1) labels_remapped = F.broadcast_equal(labels, labels.transpose()) labels_remapped = F.broadcast_div(labels_remapped, F.sum(labels_remapped, axis=1, keepdims=True)) # Add the softmax loss. labels_remapped = labels_remapped.astype(dtype='float32') xent_loss = F.sum(F.log_softmax(similarity_matrix, -1) * -labels_remapped, axis=-1, keepdims=True) xent_loss = _apply_weighting(F, xent_loss, self._weight, sample_weight) xent_loss = F.mean(xent_loss, axis=self._batch_axis, exclude=True) loss = l2loss + xent_loss if self._symmetric: similarity_matrix = F.dot(positives, anchors, transpose_a=False, transpose_b=True) xent_loss = F.sum(F.log_softmax(similarity_matrix, -1) * -labels_remapped, axis=-1, keepdims=True) xent_loss = _apply_weighting(F, xent_loss, self._weight, sample_weight) xent_loss = F.mean(xent_loss, axis=self._batch_axis, exclude=True) loss = (loss + l2loss + xent_loss) * 0.5 return loss
def hybrid_forward(self, F, output, label, sample_weight=None): if not self._from_logits: output = F.log_softmax(output) if self._sparse_label: # loss = -F.pick(output, label, axis=self._axis, keepdims=True) l = -F.pick(output, label, axis=self._axis, keepdims=True) d = nd.array([0 if i.asscalar() < 0 else 1 for i in label]).reshape((-1, 1)) loss = l * d else: loss = -F.sum(output * label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def forward(self, labels, y_pred): labels_onehot = labels #nd.one_hot(labels, self.num_classes) first_term_base = nd.square(nd.maximum(0.9 - y_pred, 0)) second_term_base = nd.square(nd.maximum(y_pred - 0.1, 0)) # import pdb; pdb.set_trace() margin_loss = labels_onehot * first_term_base + self.lambda_value * ( 1 - labels_onehot) * second_term_base margin_loss = margin_loss.sum(axis=1) loss = nd.mean(margin_loss, axis=self._batch_axis, exclude=True) loss = _apply_weighting(nd, loss, self._weight / 2, self.sample_weight) return nd.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label): label = _reshape_like(F, label, pred) sample_weight = label != self._ignore_label label = F.where(sample_weight, label, F.zeros_like(label)) if not self._from_sigmoid: loss = F.relu(pred) - pred * label + \ F.Activation(-F.abs(pred), act_type='softrelu') else: eps = 1e-12 loss = -(F.log(pred + eps) * label + F.log(1. - pred + eps) * (1. - label)) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def _mixup_forward(self, F, pred, label1, label2, lam, sample_weight=None): if not self._from_logits: pred = F.log_softmax(pred, self._axis) if self._sparse_label: loss1 = -F.pick(pred, label1, axis=self._axis, keepdims=True) loss2 = -F.pick(pred, label2, axis=self._axis, keepdims=True) loss = lam * loss1 + (1 - lam) * loss2 else: label1 = _reshape_like(F, label1, pred) label2 = _reshape_like(F, label2, pred) loss1 = -F.sum(pred*label1, axis=self._axis, keepdims=True) loss2 = -F.sum(pred*label2, axis=self._axis, keepdims=True) loss = lam * loss1 + (1 - lam) * loss2 loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def _mixup_forward(self, F, pred, label1, label2, lam, sample_weight=None): if not self._from_logits: pred = F.log_softmax(pred, self._axis) if self._sparse_label: loss1 = -F.pick(pred, label1, axis=self._axis, keepdims=True) loss2 = -F.pick(pred, label2, axis=self._axis, keepdims=True) loss = lam * loss1 + (1 - lam) * loss2 else: label1 = _reshape_like(F, label1, pred) label2 = _reshape_like(F, label2, pred) loss1 = -F.sum(pred * label1, axis=self._axis, keepdims=True) loss2 = -F.sum(pred * label2, axis=self._axis, keepdims=True) loss = lam * loss1 + (1 - lam) * loss2 loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, generated_posterior_arr, sample_weight=None): ''' Forward propagation, computing losses. Args: F: `mxnet.ndarray` or `mxnet.symbol`. generated_posterior_arr: `mxnet.ndarray` or `mxnet.symbol` of fake posterior inferenced by the generator. Returns: `mxnet.ndarray` or `mxnet.symbol` of loss. ''' loss = F.log(1 - generated_posterior_arr + 1e-08) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, orign_arr, dest_arr, sample_weight=None): ''' Forward propagation, computing L2 norm. Args: F: `mxnet.ndarray` or `mxnet.symbol`. orign_arr: `mxnet.ndarray` or `mxnet.symbol` of origins. dest_arr: `mxnet.ndarray` or `mxnet.symbol` of destinations. Returns: `mxnet.ndarray` or `mxnet.symbol` of loss. ''' dest_arr = _reshape_like(F, dest_arr, orign_arr) loss = F.sqrt(F.mean(F.square(orign_arr - dest_arr), axis=1)) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): """Loss forward""" if not self._from_logits: pred = F.sigmoid(pred) if self._sparse_label: one_hot = F.one_hot(label, self._num_class) else: one_hot = label > 0 pt = F.where(one_hot, pred, 1 - pred) t = F.ones_like(one_hot) alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t) loss = -alpha * ((1 - pt) ** self._gamma) * F.log(F.minimum(pt + self._eps, 1)) loss = _apply_weighting(F, loss, self._weight, sample_weight) if self._size_average: return F.mean(loss, axis=self._batch_axis, exclude=True) else: return F.sum(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): """Loss forward""" if not self._from_logits: pred = F.sigmoid(pred) if self._sparse_label: one_hot = F.one_hot(label, self._num_class) else: one_hot = label > 0 pt = F.where(one_hot, pred, 1 - pred) t = F.ones_like(one_hot) alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t) loss = -alpha * ((1 - pt) ** self._gamma) * F.log(F.minimum(pt + self._eps, 1)) loss = _apply_weighting(F, loss, self._weight, sample_weight) if self._size_average: return F.mean(loss, axis=self._batch_axis, exclude=True) else: return F.sum(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label): """Compute loss""" softmaxout = F.SoftmaxOutput( pred, label.astype(pred.dtype), ignore_label=self._ignore_label, multi_output=self._sparse_label, use_ignore=True, normalization='valid' if self._size_average else 'null') loss = -F.pick(F.log(softmaxout), label, axis=1, keepdims=True) loss = F.where( label.expand_dims(axis=1) == self._ignore_label, F.zeros_like(loss), loss) sample_weight = F.where( label.expand_dims(axis=1) == 0, F.ones_like(loss), F.ones_like(loss) * 10) loss = _apply_weighting(F, loss, 1., sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): """Compute loss""" if not self._from_logits: pred = F.log_softmax(pred, axis=self._axis) if self._sparse_label: if self._size_average: valid_label_map = (label != self._ignore_label).astype('float32') loss = -F.pick(pred, label, axis=self._axis, keepdims=True) loss = F.where(label.expand_dims(axis=self._axis) == self._ignore_label, F.zeros_like(loss), loss) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred*label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) if self._size_average and self._sparse_label: return F.mean(loss, axis=self._batch_axis, exclude=True) * \ valid_size / F.sum(valid_label_map) else: return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, images, num_classes, labels, X_l2norm, lambda_value=0.5, sample_weight=None): self.num_classes = num_classes labels_onehot = nd.one_hot(labels, num_classes) first_term_base = F.square(nd.maximum(0.9 - X_l2norm, 0)) second_term_base = F.square(nd.maximum(X_l2norm - 0.1, 0)) # import pdb; pdb.set_trace() margin_loss = labels_onehot * first_term_base + lambda_value * ( 1 - labels_onehot) * second_term_base margin_loss = margin_loss.sum(axis=1) loss = F.mean(margin_loss, axis=self._batch_axis, exclude=True) loss = _apply_weighting(F, loss, self._weight / 2, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): """Loss forward""" if not self._from_logits: pred = F.sigmoid(pred) one_hot = F.one_hot(label, self._num_class) one_hot = F.slice_axis(one_hot, begin=1, end=None, axis=-1) pt = F.where(one_hot, pred, 1 - pred) t = F.ones_like(one_hot) alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t) loss = -alpha * ( (1 - pt)**self._gamma) * F.log(F.minimum(pt + self._eps, 1)) loss = _apply_weighting(F, loss, self._weight, sample_weight) # Method 2: # pos_part = F.power(1 - pred, self._gamma) * one_hot * \ # F.log(pred + self._eps) # neg_part = F.power(pred, self._gamma) * (1 - one_hot) * \ # F.log(1 - pred + self._eps) # loss = -F.sum(self._alpha * pos_part + (1 - self._alpha) * neg_part, axis=-1) # loss = _apply_weighting(F, loss, self._weight, sample_weight) pos_mask = (label > 0) return F.sum(loss) / F.maximum(F.sum(pos_mask), 1)
def hybrid_forward(self, F, pred, label, sample_weight=None): one_hot = label > 0 t = F.ones_like(one_hot) if not self._from_logits: pred = F.sigmoid(pred) alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t) pt = F.where(one_hot, pred, 1 - pred) pt = F.where(label != self._ignore_label, pt, F.ones_like(pt)) beta = (1 - pt)**self._gamma t_sum = F.sum(t, axis=(-2, -1), keepdims=True) beta_sum = F.sum(beta, axis=(-2, -1), keepdims=True) mult = t_sum / (beta_sum + self._eps) if self._detach_delimeter: mult = mult.detach() beta = F.broadcast_mul(beta, mult) ignore_area = F.sum(label == -1, axis=0, exclude=True).asnumpy() sample_mult = F.mean(mult, axis=0, exclude=True).asnumpy() if np.any(ignore_area == 0): self._k_sum = 0.9 * self._k_sum + 0.1 * sample_mult[ignore_area == 0].mean() loss = -alpha * beta * F.log(F.minimum(pt + self._eps, 1)) sample_weight = label != self._ignore_label loss = _apply_weighting(F, loss, self._weight, sample_weight) if self._size_average: bsum = F.sum(sample_weight, axis=self._batch_axis, exclude=True) loss = F.sum(loss, axis=self._batch_axis, exclude=True) / (bsum + self._eps) else: loss = F.sum(loss, axis=self._batch_axis, exclude=True) return self._scale * loss
def hybrid_forward(self, F, pred, label, sample_weight=None): if not self._from_logits: pred = F.sigmoid(pred) one_hot = label > 0 pt = F.where(one_hot, pred, 1 - pred) t = label != -1 alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t) beta = (1 - pt)**self._gamma loss = -alpha * beta * F.log(F.minimum(pt + self._eps, 1)) sample_weight = label != -1 loss = _apply_weighting(F, loss, self._weight, sample_weight) if self._size_average: tsum = F.sum(label == 1, axis=self._batch_axis, exclude=True) loss = F.sum(loss, axis=self._batch_axis, exclude=True) / (tsum + self._eps) else: loss = F.sum(loss, axis=self._batch_axis, exclude=True) return self._scale * loss
def hybrid_forward(self, F, pred, label, mask, sample_weight=None): label = _reshape_like(F, label, pred) loss = F.abs(label * mask - pred * mask) loss = _apply_weighting(F, loss, self._weight, sample_weight) norm = F.sum(mask).clip(1, 1e30) return F.sum(loss) / norm
def hybrid_forward(self, F, pred, label, sample_weight=None): label = _reshape_like(F, label, pred) loss = F.square(pred - label) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pretext_pred_arr, pred_arr, pretext_label_arr, label_arr, sample_weight=None): ''' Forward propagation, computing losses. Args: F: `mxnet.ndarray` or `mxnet.symbol`. pretext_pred_arr: `mxnet.ndarray` or `mxnet.symbol` of predicted data in pretext, or target domain. pred_arr: `mxnet.ndarray` or `mxnet.symbol` of inferenced labeled feature points in source domain. pretext_label_arr: `mxnet.ndarray` or `mxnet.symbol` of label data in pretext. label_arr: `mxnet.ndarray` or `mxnet.symbol` of label data in source domain. sample_weight: element-wise weighting tensor. Must be broadcastable to the same shape as label. For example, if label has shape (64, 10) and you want to weigh each sample in the batch separately, sample_weight should have shape (64, 1). Returns: `mxnet.ndarray` or `mxnet.symbol` of loss. ''' if not self._from_logits: if self.__log_softmax_flag is True: pred_arr = F.log_softmax(pred_arr, self._axis) else: pred_arr = pred_arr - F.reshape( F.max(pred_arr, axis=self._axis), shape=(-1, 1)) pred_arr = F.exp(pred_arr) pred_arr = pred_arr / F.reshape( F.sum(pred_arr, axis=self._axis), shape=(-1, 1)) if self._sparse_label: classification_loss_arr = -F.pick( pred_arr, label_arr, axis=self._axis, keepdims=True) else: label_arr = _reshape_like(F, label_arr, pred_arr) classification_loss_arr = -F.sum( pred_arr * label_arr, axis=self._axis, keepdims=True) if self.__grad_clip_threshold > 0: classification_loss_norm = F.norm(classification_loss_arr) if classification_loss_norm.asscalar( ) > self.__grad_clip_threshold: classification_loss_arr = classification_loss_arr * self.__grad_clip_threshold / classification_loss_norm pretext_label_arr = _reshape_like(F, pretext_label_arr, pretext_pred_arr) pretext_loss_arr = -F.sum(pretext_pred_arr * pretext_label_arr, axis=self._axis, keepdims=True) / 4 if self.__grad_clip_threshold > 0: pretext_loss_norm = F.norm(pretext_loss_arr) if pretext_loss_norm.asscalar() > self.__grad_clip_threshold: pretext_loss_arr = pretext_loss_arr * self.__grad_clip_threshold / pretext_loss_norm if self.__classification_weight is None: classification_loss_arr = _apply_weighting( F, classification_loss_arr, self._weight, sample_weight) else: classification_loss_arr = _apply_weighting( F, classification_loss_arr, self.__classification_weight, sample_weight) if self.__pretext_weight is None: pretext_loss_arr = _apply_weighting(F, pretext_loss_arr, self._weight, sample_weight) else: pretext_loss_arr = _apply_weighting(F, pretext_loss_arr, self.__pretext_weight, sample_weight) classification_loss = F.mean(classification_loss_arr, axis=self._batch_axis, exclude=True) pretext_loss = F.mean(pretext_loss_arr, axis=self._batch_axis, exclude=True) total_loss = classification_loss + pretext_loss return total_loss, classification_loss, pretext_loss
def hybrid_forward(self, F, pred, label, sample_weight=None): """Compute YOLOv3 losses. :param pred: (B, N, 4) :param label: (B, N, 4) :param sample_weight: :return: """ label = F.stop_gradient(label) label = gloss._reshape_like(F, label, pred) # pred = pred.reshape(-1, 4).T # label = label.reshape(-1, 4).T # pred = F.transpose(pred) # label = F.transpose(label) if self.x1y1x2y2: b1_xmin, b1_ymin, b1_xmax, b1_ymax = F.split(pred, axis=-1, num_outputs=4) b2_xmin, b2_ymin, b2_xmax, b2_ymax = F.split(label, axis=-1, num_outputs=4) else: b1_xmin, b1_ymin, b1_xmax, b1_ymax = self._center2corner(pred) b2_xmin, b2_ymin, b2_xmax, b2_ymax = self._center2corner(label) # Intersection area MAX = 1e5 inter_w = F.clip( F.elemwise_sub(F.minimum(b1_xmax, b2_xmax), F.maximum(b1_xmin, b2_xmin)), 0, MAX) inter_h = F.clip( F.elemwise_sub(F.minimum(b1_ymax, b2_ymax), F.maximum(b1_ymin, b2_ymin)), 0, MAX) # inter_w = F.where(inter_w < 0., F.zeros_like(inter_w), inter_w) # inter_h = F.where(inter_h < 0., F.zeros_like(inter_h), inter_h) inter = F.elemwise_mul(inter_w, inter_h) # Union Area w1, h1 = F.elemwise_sub(b1_xmax, b1_xmin), F.elemwise_sub(b1_ymax, b1_ymin) w2, h2 = F.elemwise_sub(b2_xmax, b2_xmin), F.elemwise_sub(b2_ymax, b2_ymin) # w1 = F.where(w1 < 0., F.zeros_like(w1), w1) # h1 = F.where(h1 < 0., F.zeros_like(h1), h1) # w2 = F.where(w2 < 0., F.zeros_like(w2), w2) # h2 = F.where(h2 < 0., F.zeros_like(h2), h2) union = F.elemwise_mul(w1, h1) + F.elemwise_mul(w2, h2) iou = F.elemwise_div(inter, union + 1e-16) # iou # From: https://github.com/ultralytics/yolov3 # GIOU cw = F.elemwise_sub( F.maximum(b1_xmax, b2_xmax), F.minimum(b1_xmin, b2_xmin)) # convex (smallest enclosing box) width ch = F.elemwise_sub(F.maximum(b1_ymax, b2_ymax), F.minimum(b1_ymin, b2_ymin)) # convex height # cw = F.where(cw < 0., F.zeros_like(cw), cw) # ch = F.where(ch < 0., F.zeros_like(ch), ch) if self.loss_type == 'giou': c_area = F.elemwise_mul(cw, ch) + 1e-16 # convex area giou = iou - (c_area - union) / c_area # GIoU loss = 1. - giou else: # convex diagonal squared c2 = cw**2 + ch**2 + 1e-16 # centerpoint distance squared rho2 = F.square((b2_xmin + b2_xmax) - (b1_xmin + b1_xmax)) / 4 + F.square( ((b2_ymin + b2_ymax) - (b1_ymin + b1_ymax))) / 4 if self.loss_type == 'diou': diou = iou - rho2 / c2 loss = 1. - diou elif self.loss_type == 'ciou': v = (4 / mx.np.pi**2) * F.power( F.arctan(w2 / (h2 + 1e-16)) - F.arctan(w1 / (h1 + 1e-16)), 2) # TODO without pause(), coverage will be faster with mx.autograd.pause(): alpha = v / (1. - iou + v + 1e-16) alpha = F.stop_gradient(alpha) ciou = iou - (rho2 / c2 + v * alpha) loss = 1. - ciou else: raise ValueError( f'unknown loss_type: {self.loss_type}, available: giou, diou, ciou' ) loss = gloss._apply_weighting(F, loss, self._weight, sample_weight) if gloss.is_np_array(): if F is mx.ndarray: return F.np.mean(loss, axis=tuple(range(1, loss.ndim))) else: return F.npx.batch_flatten(loss).mean(axis=1) else: return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, decoded_arr, pred_arr, observed_arr, label_arr, sample_weight=None): ''' Forward propagation, computing losses. Args: F: `mxnet.ndarray` or `mxnet.symbol`. decoded_arr: `mxnet.ndarray` or `mxnet.symbol` of decoded feature points. pred_arr: `mxnet.ndarray` or `mxnet.symbol` of inferenced labeled feature points. observed_arr: `mxnet.ndarray` or `mxnet.symbol` of observed data points. label_arr: `mxnet.ndarray` or `mxnet.symbol` of label data. sample_weight: element-wise weighting tensor. Must be broadcastable to the same shape as label. For example, if label has shape (64, 10) and you want to weigh each sample in the batch separately, sample_weight should have shape (64, 1). Returns: `mxnet.ndarray` or `mxnet.symbol` of loss. ''' if not self._from_logits: if self.__log_softmax_flag is True: pred_arr = F.log_softmax(pred_arr, self._axis) else: pred_arr = pred_arr - F.reshape( F.max(pred_arr, axis=self._axis), shape=(-1, 1)) pred_arr = F.exp(pred_arr) pred_arr = pred_arr / F.reshape( F.sum(pred_arr, axis=self._axis), shape=(-1, 1)) if self._sparse_label: classification_loss_arr = -F.pick( pred_arr, label_arr, axis=self._axis, keepdims=True) else: label_arr = _reshape_like(F, label_arr, pred_arr) classification_loss_arr = -F.sum( pred_arr * label_arr, axis=self._axis, keepdims=True) if self.__grad_clip_threshold > 0: classification_loss_norm = F.norm(classification_loss_arr) if classification_loss_norm.asscalar( ) > self.__grad_clip_threshold: classification_loss_arr = classification_loss_arr * self.__grad_clip_threshold / classification_loss_norm if self.__classification_weight is None: classification_loss_arr = _apply_weighting( F, classification_loss_arr, self._weight, sample_weight) else: classification_loss_arr = _apply_weighting( F, classification_loss_arr, self.__classification_weight, sample_weight) classification_loss_arr = _apply_weighting(F, classification_loss_arr, self.__rc_lambda, sample_weight) classification_loss = F.mean(classification_loss_arr, axis=self._batch_axis, exclude=True) observed_arr = _reshape_like(F, observed_arr, decoded_arr) reconstruction_loss_arr = F.square(observed_arr - decoded_arr) if self.__grad_clip_threshold > 0: reconstruction_loss_norm = F.norm(reconstruction_loss_arr) if reconstruction_loss_norm.asscalar( ) > self.__grad_clip_threshold: reconstruction_loss_arr = reconstruction_loss_arr * self.__grad_clip_threshold / reconstruction_loss_norm if self.__reconstruction_weight is None: reconstruction_loss_arr = _apply_weighting( F, reconstruction_loss_arr, self._weight / 2, sample_weight) else: reconstruction_loss_arr = _apply_weighting( F, reconstruction_loss_arr, self.__reconstruction_weight / 2, sample_weight) reconstruction_loss_arr = _apply_weighting(F, reconstruction_loss_arr, (1 - self.__rc_lambda), sample_weight) reconstruction_loss = F.mean(reconstruction_loss_arr, axis=self._batch_axis, exclude=True) return classification_loss + reconstruction_loss, classification_loss, reconstruction_loss
def hybrid_forward(self, F, labels, *args, **kwargs): """ Computes the loss on the given data :param F: mx.nd or mx.sym :param anchors: anchor embeddings, <BxE> where B: batch size, E: embedding dimension :param positives: positive embeddings, same shape and label than anchors <BxE> :param labels: Labels of embeddings <B> :param sample_weight: weights of logits, see mx.loss :return: """ block_size = len(args) / 2 anchors = list(args[:block_size]) positives = list(args[block_size:]) # flatten last here reg_anchor = F.mean(F.sum(anchors[-1].square(), axis=1), axis=self._batch_axis, exclude=True) reg_positive = F.mean(F.sum(positives[-1].square(), axis=1), axis=self._batch_axis, exclude=True) l2loss = self._l2_reg * (reg_anchor + reg_positive) # Get per pair similarities. perceptual_similarity_matrix = [ self._simblocks[i](a, p).expand_dims(0) for i, (a, p) in enumerate(zip(anchors[:-1], positives[:-1])) ] perceptual_similarity_matrix = F.concat(*perceptual_similarity_matrix, dim=0) perceptual_similarity_matrix = F.sum(perceptual_similarity_matrix, axis=0) # Get npairs similarity matrix similarity_matrix = F.dot(anchors[-1], positives[-1], transpose_a=False, transpose_b=True) labels = labels.expand_dims(1) labels_remapped = F.broadcast_equal(labels, labels.transpose()) labels_remapped = F.broadcast_div( labels_remapped, F.sum(labels_remapped, axis=1, keepdims=True)) labels_remapped = labels_remapped.astype(dtype='float32') # Add the softmax loss xent_loss = F.sum(F.log_softmax(similarity_matrix, -1) * -labels_remapped, axis=-1, keepdims=True) xent_loss = _apply_weighting(F, xent_loss, self._weight, kwargs.get('sample_weight')) xent_loss = F.mean(xent_loss, axis=self._batch_axis, exclude=True) # Add the perceptual softmax loss perc_xent_loss = F.sum( F.log_softmax(perceptual_similarity_matrix, -1) * -labels_remapped, axis=-1, keepdims=True) perc_xent_loss = _apply_weighting(F, perc_xent_loss, self._weight, kwargs.get('sample_weight')) perc_xent_loss = F.mean(perc_xent_loss, axis=self._batch_axis, exclude=True) loss = (xent_loss + perc_xent_loss) * 0.5 if self._symmetric: perceptual_similarity_matrix = [ self._simblocks[i](a, p).expand_dims(0) for i, (a, p) in enumerate(zip(positives[:-1], anchors[:-1])) ] perceptual_similarity_matrix = F.concat( *perceptual_similarity_matrix, dim=0) perceptual_similarity_matrix = F.sum(perceptual_similarity_matrix, axis=0) similarity_matrix = F.dot(positives, anchors, transpose_a=False, transpose_b=True) xent_loss = F.sum(F.log_softmax(similarity_matrix, -1) * -labels_remapped, axis=-1, keepdims=True) xent_loss = _apply_weighting(F, xent_loss, self._weight, kwargs.get('sample_weight')) xent_loss = F.mean(xent_loss, axis=self._batch_axis, exclude=True) perc_xent_loss = F.sum( F.log_softmax(perceptual_similarity_matrix, -1) * -labels_remapped, axis=-1, keepdims=True) perc_xent_loss = _apply_weighting(F, perc_xent_loss, self._weight, kwargs.get('sample_weight')) perc_xent_loss = F.mean(perc_xent_loss, axis=self._batch_axis, exclude=True) loss = loss + (xent_loss + perc_xent_loss) * 0.5 loss = loss * 0.5 return loss + l2loss