def _mixup_forward(self, F, pred, label1, label2, lam, sample_weight=None): if not self._from_logits: pred = F.log_softmax(pred, self._axis) if self._sparse_label: loss1 = -F.pick(pred, label1, axis=self._axis, keepdims=True) loss2 = -F.pick(pred, label2, axis=self._axis, keepdims=True) loss = lam * loss1 + (1 - lam) * loss2 else: label1 = _reshape_like(F, label1, pred) label2 = _reshape_like(F, label2, pred) loss1 = -F.sum(pred*label1, axis=self._axis, keepdims=True) loss2 = -F.sum(pred*label2, axis=self._axis, keepdims=True) loss = lam * loss1 + (1 - lam) * loss2 loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def _mixup_forward(self, F, pred, label1, label2, lam, sample_weight=None): if not self._from_logits: pred = F.log_softmax(pred, self._axis) if self._sparse_label: loss1 = -F.pick(pred, label1, axis=self._axis, keepdims=True) loss2 = -F.pick(pred, label2, axis=self._axis, keepdims=True) loss = lam * loss1 + (1 - lam) * loss2 else: label1 = _reshape_like(F, label1, pred) label2 = _reshape_like(F, label2, pred) loss1 = -F.sum(pred * label1, axis=self._axis, keepdims=True) loss2 = -F.sum(pred * label2, axis=self._axis, keepdims=True) loss = lam * loss1 + (1 - lam) * loss2 loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): #각 label 문장의 마지막 문자('END') 인덱스 정보 추출 label = F.cast(label, dtype='float32') label_sent_length = F.argmax(F.where(label == self.end_idx, F.ones_like(label), F.zeros_like(label)), axis=1) if not self._from_logits: pred = F.log_softmax(pred, self._axis) if self._sparse_label: loss = -F.pick(pred, label, axis=self._axis, keepdims=True) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred * label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) #(N, 30, val) #길이를 초과하는 영역에 대해서 0로 loss 마스킹을 수행함 loss = F.transpose(loss, (1, 0, 2)) loss = F.SequenceMask(loss, sequence_length=label_sent_length + 1, use_sequence_length=True) loss = F.transpose(loss, (1, 0, 2)) return F.sum(loss, axis=self._batch_axis, exclude=True) / (label_sent_length + 1)
def hybrid_forward(self, F, pred, label, sample_weight=None, pos_weight=None): label = _reshape_like(F, label, pred) if not self._from_sigmoid: if pos_weight is None: # We use the stable formula: max(x, 0) - x * z + log(1 + exp(-abs(x))) loss = F.relu(pred) - pred * label + F.Activation( -F.abs(pred), act_type='softrelu') else: # We use the stable formula: x - x * z + (1 + z * pos_weight - z) * \ # (log(1 + exp(-abs(x))) + max(-x, 0)) log_weight = 1 + F.broadcast_mul(pos_weight - 1, label) loss = pred - pred * label + log_weight * (F.Activation( -F.abs(pred), act_type='softrelu') + F.relu(-pred)) else: eps = 1e-12 if pos_weight is None: loss = -(F.log(pred + eps) * label + F.log(1. - pred + eps) * (1. - label)) else: loss = -( F.broadcast_mul(F.log(pred + eps) * label, pos_weight) + F.log(1. - pred + eps) * (1. - label)) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [ _as_list(x) for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch num_pos = [] for cp, bp, ct, bt in zip( *[cls_pred, box_pred, cls_target, box_target]): pos_samples = (ct > 0) num_pos.append(pos_samples.sum()) num_pos_all = sum([p.asscalar() for p in num_pos]) if num_pos_all < 1: # no positive samples found, return dummy losses return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,)) # compute element-wise cross entropy loss and sort, then perform # negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip( *[cls_pred, box_pred, cls_target, box_target]): pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < ( pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where( (pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append( nd.sum( cls_loss, axis=0, exclude=True) / num_pos_all) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where( box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append( nd.sum( box_loss, axis=0, exclude=True) / num_pos_all) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def hybrid_forward(self, F, pred, label, sample_weight=None): """Forward""" pred = F.log(pred) if self._sparse_label: loss = -F.pick(pred, label, axis=self._axis, keepdims=True) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred*label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch pos_ct = [ct > 0 for ct in cls_target] num_pos = [ct.sum() for ct in pos_ct] num_pos_all = sum([p.asscalar() for p in num_pos]) # print ('num_pos_all: {}'.format(num_pos_all)) if num_pos_all < 1 and self._min_hard_negatives < 1: # no positive samples and no hard negatives, return dummy losses cls_losses = [nd.sum(cp * 0) for cp in cls_pred] box_losses = [nd.sum(bp * 0) for bp in box_pred] sum_losses = [ nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred) ] return sum_losses, cls_losses, box_losses # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip( *[cls_pred, box_pred, cls_target, box_target]): # print ('cp shape: {}'.format(cp.shape)) # print ('bp shape: {}'.format(bp.shape)) # print ('ct shape: {}'.format(ct.shape)) # print ('bt shape: {}'.format(bt.shape)) pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < nd.maximum( self._min_hard_negatives, pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append( nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all)) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append( nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all)) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def hybrid_forward(self, F, pred, label, sample_weight=None): if not self._from_logits: pred = F.log_softmax(pred, self._axis) if self._sparse_label: loss = -F.pick(pred, label, axis=self._axis, keepdims=True) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred * label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) diceloss = self.dice_loss(F, pred, label) return F.mean(loss, axis=self._batch_axis, exclude=True) + diceloss
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch num_pos = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pos_samples = (ct > 0) num_pos.append(pos_samples.sum()) num_pos_all = sum([p.asscalar() for p in num_pos]) # synchronize across different machines # print('before sync:', num_pos_all) if self._distributed: num_pos_out = nd.zeros(1, mx.cpu()) num_pos_in = nd.zeros(1, mx.cpu()) + num_pos_all # allreduce only supports pushpull if 'allreduce' in self._kv_store_type: self._kv_store.pushpull(self._num_pos_key, num_pos_in, num_pos_out) else: self._kv_store.push(self._num_pos_key, num_pos_in) # self._kv_store._barrier() self._kv_store.pull(self._num_pos_key, out=num_pos_out) num_pos_all = num_pos_out.asscalar() # print('after sync:', num_pos_all) if num_pos_all < 1: # no positive samples found, return dummy losses return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,)) # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < (pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / num_pos_all) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / num_pos_all) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def hybrid_forward(self, F, pred, label, sample_weight=None): label = _reshape_like(F, label, pred) if not self._from_sigmoid: max_val = F.relu(-pred) loss = pred - pred * label + max_val + F.log(F.exp(-max_val) + F.exp(-pred - max_val)) else: p = mx.nd.array(1 / (1 + nd.exp(-pred)), ctx=ctx) weights = nd.exp(label + (1 - label * 2) * batch_ratios) gamma = 2 w_p, w_n = nd.power(1. - p, gamma), nd.power(p, gamma) loss = - (w_p * F.log(p + 1e-12) * label + w_n * F.log(1. - p + 1e-12) * (1. - label)) loss *= weights return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label): """Compute loss""" softmaxout = F.SoftmaxOutput( pred, label.astype(pred.dtype), ignore_label=self._ignore_label, multi_output=self._sparse_label, use_ignore=True, normalization='valid' if self._size_average else 'null') if self._sparse_label: loss = -F.pick(F.log(softmaxout), label, axis=1, keepdims=True) else: label = _reshape_like(F, label, pred) loss = -F.sum(F.log(softmaxout) * label, axis=-1, keepdims=True) loss = F.where(label.expand_dims(axis=1) == self._ignore_label, F.zeros_like(loss), loss) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label): label = _reshape_like(F, label, pred) sample_weight = label != self._ignore_label label = F.where(sample_weight, label, F.zeros_like(label)) if not self._from_sigmoid: loss = F.relu(pred) - pred * label + \ F.Activation(-F.abs(pred), act_type='softrelu') else: eps = 1e-12 loss = -(F.log(pred + eps) * label + F.log(1. - pred + eps) * (1. - label)) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): label = _reshape_like(F, label, pred) if not self._from_sigmoid: max_val = F.relu(-pred) loss = pred - pred * label + max_val + F.log(F.exp(-max_val) + F.exp(-pred - max_val)) else: p = mx.nd.array(1 / (1 + nd.exp(-pred)), ctx=ctx) if epoch >= history_track and not args.test: p_hist = prediction_history[:, batch_id * args.batch_size: (batch_id + 1) * args.batch_size, :] p_std = (np.var(p_hist, axis=0) + (np.var(p_hist, axis=0)**2)/(p_hist.shape[0] - 1))**.5 std_weights = nd.array(1 + p_std, ctx=ctx) loss = - std_weights * (F.log(p + 1e-12) * label + F.log(1. - p + 1e-12) * (1. - label)) else: loss = - (F.log(p + 1e-12) * label + F.log(1. - p + 1e-12) * (1. - label)) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, orign_arr, dest_arr, sample_weight=None): ''' Forward propagation, computing L2 norm. Args: F: `mxnet.ndarray` or `mxnet.symbol`. orign_arr: `mxnet.ndarray` or `mxnet.symbol` of origins. dest_arr: `mxnet.ndarray` or `mxnet.symbol` of destinations. Returns: `mxnet.ndarray` or `mxnet.symbol` of loss. ''' dest_arr = _reshape_like(F, dest_arr, orign_arr) loss = F.sqrt(F.mean(F.square(orign_arr - dest_arr), axis=1)) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, images, cls_target, box_target, r, idx_r): if self.bulk_last_wgrad: # make the last wgrad use the copy of the input # so it joins the bulk images = F.identity(images) cls_pred, box_pred = self.net(images) # loss needs to be done in FP32 cls_pred = cls_pred.astype(dtype='float32') box_pred = box_pred.astype(dtype='float32') pred = F.log_softmax(cls_pred, axis=-1) pos = cls_target > 0 pos_num = pos.sum(axis=1) cls_loss = -F.pick(pred, cls_target, axis=-1, keepdims=False) idx = (cls_loss * (pos - 1)).argsort(axis=1) # use scatter_nd to save one argsort idx_c = idx.reshape((1, -1)).squeeze(axis=0) # column indices idx = F.stack(idx_r, idx_c) rank = F.scatter_nd(r, idx, (self.s0, self.s1)) hard_negative = F.broadcast_lesser( rank, F.maximum(self._min_hard_negatives, pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)) # mask out if not positive or negative cls_loss = F.where((pos + hard_negative) > 0, cls_loss, F.zeros_like(cls_loss)) cls_loss = F.sum(cls_loss, axis=0, exclude=True) box_pred = _reshape_like(F, box_pred, box_target) box_loss = F.abs(box_pred - box_target) box_loss = F.smooth_l1(data=box_loss, scalar=1.0) # box loss only apply to positive samples box_loss = F.broadcast_mul(box_loss, pos.expand_dims(axis=-1)) box_loss = F.sum(box_loss, axis=0, exclude=True) # normalize loss with num_pos_per_image # see https://github.com/mlperf/training/blob/master/single_stage_detector/ssd/base_model.py#L201-L204 num_mask = (pos_num > 0).astype('float32') pos_num = pos_num.astype('float32').clip(a_min=1e-6, a_max=8732) sum_loss = (num_mask * (cls_loss + self._lambd * box_loss) / pos_num).mean(axis=0) return sum_loss
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices.""" # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch num_pos = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pos_samples = (ct > 0) num_pos.append(pos_samples.sum()) num_pos_all = sum([p.asscalar() for p in num_pos]) if num_pos_all < 1 and self._min_hard_negatives < 1: # no positive samples and no hard negatives, return dummy losses cls_losses = [nd.sum(cp * 0) for cp in cls_pred] box_losses = [nd.sum(bp * 0) for bp in box_pred] sum_losses = [nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred)] return sum_losses, cls_losses, box_losses # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]): pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < nd.maximum(self._min_hard_negatives, pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all)) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all)) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def hybrid_forward(self, F, pred, label, sample_weight=None): """Loss forward""" if not self._from_logits: pred = F.sigmoid(pred) if self._sparse_label: one_hot = F.one_hot(label, self._num_class) one_hot = _reshape_like(F, one_hot, pred) else: one_hot = label > 0 pt = F.where(one_hot, pred, 1 - pred) t = F.ones_like(one_hot) alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t) loss = -alpha * ( (1 - pt)**self._gamma) * F.log(F.minimum(pt + self._eps, 1)) loss = _apply_weighting(F, loss, self._weight, sample_weight) if self._size_average: return F.mean(loss, axis=self._batch_axis, exclude=True) else: return F.sum(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): """Compute loss""" if not self._from_logits: pred = F.log_softmax(pred, axis=self._axis) if self._sparse_label: if self._size_average: valid_label_map = (label != self._ignore_label).astype('float32') loss = -F.pick(pred, label, axis=self._axis, keepdims=True) loss = F.where(label.expand_dims(axis=self._axis) == self._ignore_label, F.zeros_like(loss), loss) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred*label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) if self._size_average and self._sparse_label: return F.mean(loss, axis=self._batch_axis, exclude=True) * \ valid_size / F.sum(valid_label_map) else: return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, mask, sample_weight=None): label = _reshape_like(F, label, pred) loss = F.abs(label * mask - pred * mask) loss = _apply_weighting(F, loss, self._weight, sample_weight) norm = F.sum(mask).clip(1, 1e30) return F.sum(loss) / norm
def forward(self, cls_pred, box_pred, cls_target, box_target): """Compute loss in entire batch across devices. Parameters ---------- cls_pred : mxnet.nd.NDArray Predicted classes. box_pred : mxnet.nd.NDArray Predicted bounding-boxes. cls_target : mxnet.nd.NDArray Ground-truth classes. box_target : mxnet.nd.NDArray Ground-truth bounding-boxes. Returns ------- tuple of NDArrays sum_losses : array with containing the sum of class prediction and bounding-box regression loss. cls_losses : array of class prediction loss. box_losses : array of box regression L1 loss. """ # require results across different devices at this time cls_pred, box_pred, cls_target, box_target = [_as_list(x) \ for x in (cls_pred, box_pred, cls_target, box_target)] # cross device reduction to obtain positive samples in entire batch num_pos = [] for cp, bp, ct, bt in zip( *[cls_pred, box_pred, cls_target, box_target]): pos_samples = (ct > 0) num_pos.append(pos_samples.sum()) num_pos_all = sum([p.asscalar() for p in num_pos]) if num_pos_all < 1 and self._min_hard_negatives < 1: # no positive samples and no hard negatives, return dummy losses cls_losses = [nd.sum(cp * 0) for cp in cls_pred] box_losses = [nd.sum(bp * 0) for bp in box_pred] sum_losses = [ nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred) ] return sum_losses, cls_losses, box_losses # compute element-wise cross entropy loss and sort, then perform negative mining cls_losses = [] box_losses = [] sum_losses = [] for cp, bp, ct, bt in zip( *[cls_pred, box_pred, cls_target, box_target]): pred = nd.log_softmax(cp, axis=-1) pos = ct > 0 cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False) rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1) hard_negative = rank < nd.maximum( self._min_hard_negatives, pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1) # mask out if not positive or negative cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss)) cls_losses.append( nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all)) bp = _reshape_like(nd, bp, bt) box_loss = nd.abs(bp - bt) box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho, (0.5 / self._rho) * nd.square(box_loss)) # box loss only apply to positive samples box_loss = box_loss * pos.expand_dims(axis=-1) box_losses.append( nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all)) sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1]) return sum_losses, cls_losses, box_losses
def hybrid_forward(self, F, pred, label, sample_weight=None): label = _reshape_like(F, label, pred) loss = F.square(pred - label) loss = _apply_weighting(F, loss, self._weight, sample_weight) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pretext_pred_arr, pred_arr, pretext_label_arr, label_arr, sample_weight=None): ''' Forward propagation, computing losses. Args: F: `mxnet.ndarray` or `mxnet.symbol`. pretext_pred_arr: `mxnet.ndarray` or `mxnet.symbol` of predicted data in pretext, or target domain. pred_arr: `mxnet.ndarray` or `mxnet.symbol` of inferenced labeled feature points in source domain. pretext_label_arr: `mxnet.ndarray` or `mxnet.symbol` of label data in pretext. label_arr: `mxnet.ndarray` or `mxnet.symbol` of label data in source domain. sample_weight: element-wise weighting tensor. Must be broadcastable to the same shape as label. For example, if label has shape (64, 10) and you want to weigh each sample in the batch separately, sample_weight should have shape (64, 1). Returns: `mxnet.ndarray` or `mxnet.symbol` of loss. ''' if not self._from_logits: if self.__log_softmax_flag is True: pred_arr = F.log_softmax(pred_arr, self._axis) else: pred_arr = pred_arr - F.reshape( F.max(pred_arr, axis=self._axis), shape=(-1, 1)) pred_arr = F.exp(pred_arr) pred_arr = pred_arr / F.reshape( F.sum(pred_arr, axis=self._axis), shape=(-1, 1)) if self._sparse_label: classification_loss_arr = -F.pick( pred_arr, label_arr, axis=self._axis, keepdims=True) else: label_arr = _reshape_like(F, label_arr, pred_arr) classification_loss_arr = -F.sum( pred_arr * label_arr, axis=self._axis, keepdims=True) if self.__grad_clip_threshold > 0: classification_loss_norm = F.norm(classification_loss_arr) if classification_loss_norm.asscalar( ) > self.__grad_clip_threshold: classification_loss_arr = classification_loss_arr * self.__grad_clip_threshold / classification_loss_norm pretext_label_arr = _reshape_like(F, pretext_label_arr, pretext_pred_arr) pretext_loss_arr = -F.sum(pretext_pred_arr * pretext_label_arr, axis=self._axis, keepdims=True) / 4 if self.__grad_clip_threshold > 0: pretext_loss_norm = F.norm(pretext_loss_arr) if pretext_loss_norm.asscalar() > self.__grad_clip_threshold: pretext_loss_arr = pretext_loss_arr * self.__grad_clip_threshold / pretext_loss_norm if self.__classification_weight is None: classification_loss_arr = _apply_weighting( F, classification_loss_arr, self._weight, sample_weight) else: classification_loss_arr = _apply_weighting( F, classification_loss_arr, self.__classification_weight, sample_weight) if self.__pretext_weight is None: pretext_loss_arr = _apply_weighting(F, pretext_loss_arr, self._weight, sample_weight) else: pretext_loss_arr = _apply_weighting(F, pretext_loss_arr, self.__pretext_weight, sample_weight) classification_loss = F.mean(classification_loss_arr, axis=self._batch_axis, exclude=True) pretext_loss = F.mean(pretext_loss_arr, axis=self._batch_axis, exclude=True) total_loss = classification_loss + pretext_loss return total_loss, classification_loss, pretext_loss
def hybrid_forward(self, F, decoded_arr, pred_arr, observed_arr, label_arr, sample_weight=None): ''' Forward propagation, computing losses. Args: F: `mxnet.ndarray` or `mxnet.symbol`. decoded_arr: `mxnet.ndarray` or `mxnet.symbol` of decoded feature points. pred_arr: `mxnet.ndarray` or `mxnet.symbol` of inferenced labeled feature points. observed_arr: `mxnet.ndarray` or `mxnet.symbol` of observed data points. label_arr: `mxnet.ndarray` or `mxnet.symbol` of label data. sample_weight: element-wise weighting tensor. Must be broadcastable to the same shape as label. For example, if label has shape (64, 10) and you want to weigh each sample in the batch separately, sample_weight should have shape (64, 1). Returns: `mxnet.ndarray` or `mxnet.symbol` of loss. ''' if not self._from_logits: if self.__log_softmax_flag is True: pred_arr = F.log_softmax(pred_arr, self._axis) else: pred_arr = pred_arr - F.reshape( F.max(pred_arr, axis=self._axis), shape=(-1, 1)) pred_arr = F.exp(pred_arr) pred_arr = pred_arr / F.reshape( F.sum(pred_arr, axis=self._axis), shape=(-1, 1)) if self._sparse_label: classification_loss_arr = -F.pick( pred_arr, label_arr, axis=self._axis, keepdims=True) else: label_arr = _reshape_like(F, label_arr, pred_arr) classification_loss_arr = -F.sum( pred_arr * label_arr, axis=self._axis, keepdims=True) if self.__grad_clip_threshold > 0: classification_loss_norm = F.norm(classification_loss_arr) if classification_loss_norm.asscalar( ) > self.__grad_clip_threshold: classification_loss_arr = classification_loss_arr * self.__grad_clip_threshold / classification_loss_norm if self.__classification_weight is None: classification_loss_arr = _apply_weighting( F, classification_loss_arr, self._weight, sample_weight) else: classification_loss_arr = _apply_weighting( F, classification_loss_arr, self.__classification_weight, sample_weight) classification_loss_arr = _apply_weighting(F, classification_loss_arr, self.__rc_lambda, sample_weight) classification_loss = F.mean(classification_loss_arr, axis=self._batch_axis, exclude=True) observed_arr = _reshape_like(F, observed_arr, decoded_arr) reconstruction_loss_arr = F.square(observed_arr - decoded_arr) if self.__grad_clip_threshold > 0: reconstruction_loss_norm = F.norm(reconstruction_loss_arr) if reconstruction_loss_norm.asscalar( ) > self.__grad_clip_threshold: reconstruction_loss_arr = reconstruction_loss_arr * self.__grad_clip_threshold / reconstruction_loss_norm if self.__reconstruction_weight is None: reconstruction_loss_arr = _apply_weighting( F, reconstruction_loss_arr, self._weight / 2, sample_weight) else: reconstruction_loss_arr = _apply_weighting( F, reconstruction_loss_arr, self.__reconstruction_weight / 2, sample_weight) reconstruction_loss_arr = _apply_weighting(F, reconstruction_loss_arr, (1 - self.__rc_lambda), sample_weight) reconstruction_loss = F.mean(reconstruction_loss_arr, axis=self._batch_axis, exclude=True) return classification_loss + reconstruction_loss, classification_loss, reconstruction_loss
def hybrid_forward(self, F, pred, label, sample_weight=None): """Compute YOLOv3 losses. :param pred: (B, N, 4) :param label: (B, N, 4) :param sample_weight: :return: """ label = F.stop_gradient(label) label = gloss._reshape_like(F, label, pred) # pred = pred.reshape(-1, 4).T # label = label.reshape(-1, 4).T # pred = F.transpose(pred) # label = F.transpose(label) if self.x1y1x2y2: b1_xmin, b1_ymin, b1_xmax, b1_ymax = F.split(pred, axis=-1, num_outputs=4) b2_xmin, b2_ymin, b2_xmax, b2_ymax = F.split(label, axis=-1, num_outputs=4) else: b1_xmin, b1_ymin, b1_xmax, b1_ymax = self._center2corner(pred) b2_xmin, b2_ymin, b2_xmax, b2_ymax = self._center2corner(label) # Intersection area MAX = 1e5 inter_w = F.clip( F.elemwise_sub(F.minimum(b1_xmax, b2_xmax), F.maximum(b1_xmin, b2_xmin)), 0, MAX) inter_h = F.clip( F.elemwise_sub(F.minimum(b1_ymax, b2_ymax), F.maximum(b1_ymin, b2_ymin)), 0, MAX) # inter_w = F.where(inter_w < 0., F.zeros_like(inter_w), inter_w) # inter_h = F.where(inter_h < 0., F.zeros_like(inter_h), inter_h) inter = F.elemwise_mul(inter_w, inter_h) # Union Area w1, h1 = F.elemwise_sub(b1_xmax, b1_xmin), F.elemwise_sub(b1_ymax, b1_ymin) w2, h2 = F.elemwise_sub(b2_xmax, b2_xmin), F.elemwise_sub(b2_ymax, b2_ymin) # w1 = F.where(w1 < 0., F.zeros_like(w1), w1) # h1 = F.where(h1 < 0., F.zeros_like(h1), h1) # w2 = F.where(w2 < 0., F.zeros_like(w2), w2) # h2 = F.where(h2 < 0., F.zeros_like(h2), h2) union = F.elemwise_mul(w1, h1) + F.elemwise_mul(w2, h2) iou = F.elemwise_div(inter, union + 1e-16) # iou # From: https://github.com/ultralytics/yolov3 # GIOU cw = F.elemwise_sub( F.maximum(b1_xmax, b2_xmax), F.minimum(b1_xmin, b2_xmin)) # convex (smallest enclosing box) width ch = F.elemwise_sub(F.maximum(b1_ymax, b2_ymax), F.minimum(b1_ymin, b2_ymin)) # convex height # cw = F.where(cw < 0., F.zeros_like(cw), cw) # ch = F.where(ch < 0., F.zeros_like(ch), ch) if self.loss_type == 'giou': c_area = F.elemwise_mul(cw, ch) + 1e-16 # convex area giou = iou - (c_area - union) / c_area # GIoU loss = 1. - giou else: # convex diagonal squared c2 = cw**2 + ch**2 + 1e-16 # centerpoint distance squared rho2 = F.square((b2_xmin + b2_xmax) - (b1_xmin + b1_xmax)) / 4 + F.square( ((b2_ymin + b2_ymax) - (b1_ymin + b1_ymax))) / 4 if self.loss_type == 'diou': diou = iou - rho2 / c2 loss = 1. - diou elif self.loss_type == 'ciou': v = (4 / mx.np.pi**2) * F.power( F.arctan(w2 / (h2 + 1e-16)) - F.arctan(w1 / (h1 + 1e-16)), 2) # TODO without pause(), coverage will be faster with mx.autograd.pause(): alpha = v / (1. - iou + v + 1e-16) alpha = F.stop_gradient(alpha) ciou = iou - (rho2 / c2 + v * alpha) loss = 1. - ciou else: raise ValueError( f'unknown loss_type: {self.loss_type}, available: giou, diou, ciou' ) loss = gloss._apply_weighting(F, loss, self._weight, sample_weight) if gloss.is_np_array(): if F is mx.ndarray: return F.np.mean(loss, axis=tuple(range(1, loss.ndim))) else: return F.npx.batch_flatten(loss).mean(axis=1) else: return F.mean(loss, axis=self._batch_axis, exclude=True)