def hybrid_forward(self, F, *x): if autograd.is_training(): pre_nms = self.train_pre_nms post_nms = self.train_post_nms else: pre_nms = self.test_pre_nms post_nms = self.test_post_nms anchors = [] rpn_pre_nms_proposals = [] raw_rpn_scores = [] raw_rpn_boxes = [] for i, feat in enumerate(x): # raw_rpn_score (B, HWN, 1) # raw_rpn_box (B, HWN, 4) rpn_score, rpn_box, raw_rpn_score, raw_rpn_box = self.head(feat) with autograd.pause(): anchor = self.anchor_generator[i](feat) anchor = anchor.reshape((-1, 4)) # (1, N, 4) anchors.append(anchor) # (B, N, 4) rpn_box = self.box_decoder(rpn_box, anchor) rpn_box = self.box_clip(rpn_box) rpn_pre = F.concat(rpn_score, rpn_box, dim=-1) if self.per_level_nms: rpn_pre = F.contrib.box_nms(rpn_pre, overlap_thresh=self.nms_thresh, topk=pre_nms // len(x), coord_start=1, score_index=0, id_index=-1) rpn_pre_nms_proposals.append(rpn_pre) raw_rpn_scores.append(raw_rpn_score) raw_rpn_boxes.append(raw_rpn_box) rpn_pre_nms_proposals = F.concat(*rpn_pre_nms_proposals, dim=1) raw_rpn_scores = F.concat(*raw_rpn_scores, dim=1) raw_rpn_boxes = F.concat(*raw_rpn_boxes, dim=1) with autograd.pause(): if self.per_level_nms: # Sort the proposals by scores. So the overlap_thresh=2 tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=2, topk=pre_nms + 1, coord_start=1, score_index=0, id_index=-1) else: tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=self.nms_thresh, topk=pre_nms, coord_start=1, score_index=0, id_index=-1) result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms) rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1) rpn_boxes = F.slice_axis(result, axis=-1, begin=1, end=None) if autograd.is_training(): return rpn_scores, rpn_boxes, raw_rpn_scores, raw_rpn_boxes, anchors else: return rpn_scores, rpn_boxes
def hybrid_forward(self, F, x, basis=None, level_codes=None, thrs_multiplier=None): # print('basis:{}'.format(basis)) if basis is None: return x # calculate levels and sort with autograd.pause(): levels = F.dot(level_codes, basis) levels, sort_id = F.topk(F.transpose(levels), ret_typ='both', k=self.num_levels, is_ascend=1) # ascend levels = F.transpose(levels) # TODO: levels need backward sort_id = F.transpose(sort_id) # calculate threshold thrs = F.dot(thrs_multiplier, levels) # calculate output y and its binary code y = F.zeros_like(x) # output reshape_x = F.reshape(x, [-1]) BT = F.zeros_like(reshape_x) BT = F.reshape(F.repeat(BT, self.nbits), shape=(-1, self.nbits)) # (N, k) zero_y = F.zeros_like(x) zero_bits_y = F.zeros_like(BT) for i in range(self.num_levels - 1): g = F.broadcast_greater(x, thrs[i]) # module 'mxnet.symbol' has no attribute 'greater' y = F.where(g, zero_y + levels[i + 1], y) BT = F.where(F.reshape(g, [-1]), zero_bits_y + level_codes[sort_id[i + 1][0]], BT) if autograd.is_training(): with autograd.pause(): # calculate BxBT B = F.transpose(BT) BxBT = F.zeros([self.nbits, self.nbits]) for i in range(self.nbits): for j in range(self.nbits): BxBTij = F.multiply(B[i], B[j]) BxBTij = F.sum(BxBTij) if i == j: BxBTij += EPS BxBT[i, j] = BxBTij BxBT_inv = F.Custom(BxBT.expand_dims(0), op_type='matrix_inverse') BxBT_inv = BxBT_inv[0] # BxBT_inv = BxBT # calculate BxX BxX = F.zeros([self.nbits]) for i in range(self.nbits): BxXi0 = F.multiply(B[i], reshape_x) BxXi0 = F.sum(BxXi0) BxX[i] = BxXi0 BxX = F.reshape(BxX, [self.nbits, 1]) new_basis = F.dot(BxBT_inv, BxX) # create moving averages op basis = MOVING_AVERAGES_FACTOR * basis + new_basis * (1 - MOVING_AVERAGES_FACTOR) self.basis.set_data(basis) x_clip = F.minimum(x, levels[self.num_levels - 1]) # gradient clip y = x_clip + F.stop_gradient(-x_clip) + F.stop_gradient(y) # gradient: y=clip(x) return y
def hybrid_forward(self, F, box_preds, gt_boxes, obj_t, centers_t, scales_t, weights_t, clas_t): """Short summary. Parameters ---------- F : mxnet.nd or mxnet.sym `F` is mxnet.sym if hybridized or mxnet.nd if not. box_preds : mxnet.nd.NDArray Predicted bounding boxes. gt_boxes : mxnet.nd.NDArray Ground-truth bounding boxes. obj_t : mxnet.nd.NDArray Prefetched Objectness targets. centers_t : mxnet.nd.NDArray Prefetched regression target for center x and y. scales_t : mxnet.nd.NDArray Prefetched regression target for scale x and y. weights_t : mxnet.nd.NDArray Prefetched element-wise gradient weights for center_targets and scale_targets. clas_t : mxnet.nd.NDArray Prefetched one-hot vector for classification. Returns ------- (tuple of) mxnet.nd.NDArray objectness: 0 for negative, 1 for positive, -1 for ignore. center_targets: regression target for center x and y. scale_targets: regression target for scale x and y. weights: element-wise gradient weights for center_targets and scale_targets. class_targets: a one-hot vector for classification. """ with autograd.pause(): dynamic_t = self._dynamic_target(box_preds, gt_boxes) # use fixed target to override dynamic targets obj, centers, scales, weights, clas = zip( dynamic_t, [obj_t, centers_t, scales_t, weights_t, clas_t]) mask = obj[1] > 0 objectness = F.where(mask, obj[1], obj[0]) mask2 = mask.tile(reps=(2, )) center_targets = F.where(mask2, centers[1], centers[0]) scale_targets = F.where(mask2, scales[1], scales[0]) weights = F.where(mask2, weights[1], weights[0]) mask3 = mask.tile(reps=(self._num_class, )) class_targets = F.where(mask3, clas[1], clas[0]) smooth_weight = 1. / self._num_class if self._label_smooth: smooth_weight = 1. / self._num_class class_targets = F.where(class_targets > 0.5, class_targets - smooth_weight, class_targets) class_targets = F.where( class_targets < -0.5, class_targets, F.ones_like(class_targets) * smooth_weight) class_mask = mask.tile( reps=(self._num_class, )) * (class_targets >= 0) return [ F.stop_gradient(x) for x in [ objectness, center_targets, scale_targets, weights, class_targets, class_mask ] ]
def hybrid_forward(self, F, features, labels): """ 根据triplet loss修改,同类样本间的距离小于一类一个margin. 此为第二种修改方式:统计所有样本组成的样本对 """ num_p = (labels.expand_dims(axis=1) == labels.expand_dims(axis=0)).sum().astype(np.float32) - 128 num_n = (labels.expand_dims(axis=1) != labels.expand_dims(axis=0)).sum().astype(np.float32) with autograd.pause(): w_same = (labels.expand_dims(axis=1) == labels.expand_dims(axis=0)) w_same = w_same - F.diag(F.diag(w_same)) w_diff = (labels.expand_dims(axis=1) != labels.expand_dims(axis=0)) # w_ij: 同类为1,不同为-1, i==j为0 w = w_same - w_diff # w_ijk: ij同类,jk异类为1,其他为0 w = (w.expand_dims(axis=2) - w.expand_dims(axis=0) - 1).relu() w = w.astype(np.float32) distance = ((features.expand_dims(axis=1) - features.expand_dims(axis=0))**2).sum(axis=-1) # loss_ijk = d_ij - d_jk loss = (distance.expand_dims(axis=2) - distance.expand_dims(axis=0) + self._margin).relu() loss = w * loss loss = loss.sum() / w.sum() return loss
def hybrid_forward(self, F, roi, gt_box): """ Only support batch_size=1 now. """ with autograd.pause(): # cocnat rpn roi with ground truths all_roi = F.concat(roi.squeeze(axis=0), gt_box.squeeze(axis=0), dim=0) # calculate ious between (N, 4) anchors and (M, 4) bbox ground-truths # ious is (N, M) ious = F.contrib.box_iou(all_roi, gt_box, format='corner').transpose((1, 0, 2)) matches = self._matcher(ious) samples = F.Custom(matches, ious, op_type='quota_sampler', num_sample=self._num_sample, pos_thresh=self._pos_iou_thresh, neg_thresh_high=self._neg_iou_thresh_high, neg_thresh_low=self._neg_iou_thresh_low, pos_ratio=self._pos_ratio) samples = samples.squeeze(axis=0) # remove batch axis matches = matches.squeeze(axis=0) # shuffle and argsort, take first num_sample samples sf_samples = F.where(samples == 0, F.ones_like(samples) * -999, samples) indices = F.argsort(sf_samples, is_ascend=False).slice_axis( axis=0, begin=0, end=self._num_sample) new_roi = all_roi.take(indices).expand_dims(0) new_samples = samples.take(indices).expand_dims(0) new_matches = matches.take(indices).expand_dims(0) return new_roi, new_samples, new_matches
def forward(self, bbox, anchor, width, height): """ Only support batch_size=1 now. Be careful there's numpy operations inside """ F = mx.nd with autograd.pause(): # anchor with shape (N, 4) a_xmin, a_ymin, a_xmax, a_ymax = self._bbox_split(anchor) # invalid anchor mask with shape (N, 1) imask = ((a_xmin >= -self._allowed_border) * (a_ymin >= -self._allowed_border) * (a_xmax <= (width + self._allowed_border)) * (a_ymax <= (height + self._allowed_border))) <= 0 imask = mx.nd.array(np.where(imask.asnumpy() > 0)[0], ctx=anchor.context) # calculate ious between (N, 4) anchors and (M, 4) bbox ground-truths # ious is (N, M) ious = F.contrib.box_iou(anchor, bbox, format='corner').transpose( (1, 0, 2)) ious[:, imask, :] = -1 matches = self._matcher(ious) samples = self._sampler(matches, ious) samples = samples.as_in_context(anchor.context) # training targets for RPN cls_target, _ = self._cls_encoder(samples) box_target, box_mask = self._box_encoder( samples, matches, anchor.expand_dims(axis=0), bbox) return cls_target, box_target, box_mask
def forward(self, bboxes, anchors, height, width): # 标注ious with autograd.pause(): ious = mx.nd.contrib.box_iou(anchors, bboxes) # 去除无效的锚框(超出边界的) x_min, y_min, x_max, y_max = self._spliter(anchors) invalid_mask = (x_min < 0) + (y_min < 0) + (x_max >= width) + ( y_max >= height) # 将所有无效锚框的ious设为-1 invalid_mask = nd.repeat(invalid_mask, repeats=bboxes.shape[0], axis=-1) ious = nd.where(invalid_mask > 0, nd.ones_like(ious) * -1, ious) # 对锚框进行采样 samples, matches = self._sampler(ious) # 下面进行标注 cls_label, _ = self._cls_encoder(samples) targets, masks = self._bbox_encoder(samples.expand_dims(axis=0), matches.expand_dims(axis=0), anchors.expand_dims(axis=0), bboxes.expand_dims(axis=0)) return cls_label, targets[0], masks[0]
def sample( self, num_samples: Optional[int] = None, dtype=np.float32 ) -> Tensor: r""" Draw samples from the distribution. If num_samples is given the first dimension of the output will be num_samples. Parameters ---------- num_samples Number of samples to to be drawn. dtype Data-type of the samples. Returns ------- Tensor A tensor containing samples. This has shape `(*batch_shape, *eval_shape)` if `num_samples = None` and `(num_samples, *batch_shape, *eval_shape)` otherwise. """ with autograd.pause(): var = self.sample_rep(num_samples=num_samples, dtype=dtype) F = getF(var) return F.BlockGrad(var)
def hybrid_forward(self, F, anchor, score, bbox_pred, img): """ Generate proposals. Limit to batch-size=1 in current implementation. """ if autograd.is_training(): pre_nms = self._train_pre_nms post_nms = self._train_post_nms else: pre_nms = self._test_pre_nms post_nms = self._test_post_nms with autograd.pause(): # restore bounding boxes roi = self._box_decoder(bbox_pred, self._box_to_center(anchor)) # clip rois to image's boundary # roi = F.Custom(roi, img, op_type='bbox_clip_to_image') roi = self._clipper(roi, img) # remove bounding boxes that don't meet the min_size constraint # by setting them to (-1, -1, -1, -1) # width = roi.slice_axis(axis=-1, begin=2, end=3) # height = roi.slice_axis(axis=-1, begin=3, end=None) xmin, ymin, xmax, ymax = roi.split(axis=-1, num_outputs=4) width = xmax - xmin height = ymax - ymin # TODO:(zhreshold), there's im_ratio to handle here, but it requires # add' info, and we don't expect big difference invalid = (width < self._min_size) + (height < self._min_size) # # remove out of bound anchors # axmin, aymin, axmax, aymax = F.split(anchor, axis=-1, num_outputs=4) # # it's a bit tricky to get right/bottom boundary in hybridblock # wrange = F.arange(0, 2560).reshape((1, 1, 1, 2560)).slice_like( # img, axes=(3)).max().reshape((1, 1, 1)) # hrange = F.arange(0, 2560).reshape((1, 1, 2560, 1)).slice_like( # img, axes=(2)).max().reshape((1, 1, 1)) # invalid = (axmin < 0) + (aymin < 0) + F.broadcast_greater(axmax, wrange) + \ # F.broadcast_greater(aymax, hrange) # avoid invalid anchors suppress anchors with 0 confidence score = F.where(invalid, F.ones_like(invalid) * -1, score) invalid = F.repeat(invalid, axis=-1, repeats=4) roi = F.where(invalid, F.ones_like(invalid) * -1, roi) # Non-maximum suppression pre = F.concat(score, roi, dim=-1) tmp = F.contrib.box_nms(pre, overlap_thresh=self._nms_thresh, topk=pre_nms, coord_start=1, score_index=0, id_index=-1, force_suppress=True) # slice post_nms number of boxes result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms) rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1) rpn_bbox = F.slice_axis(result, axis=-1, begin=1, end=None) return rpn_scores, rpn_bbox
def hybrid_forward(self, F, x, *args, **kw): """ This function does all the preprocesses and postprocesses for the execution of a InferenceAlgorithm. :param F: the MXNet computation mode :type F: mxnet.symbol or mxnet.ndarray :param x: a dummy variable to enable the execution of this Gluon block :type x: MXNet NDArray or MXNet Symbol :param *arg: all the positional arguments, which correspond to the data provided to the InferenceAlgorithm. :type *arg: list of MXNet NDArray or MXNet Symbol :param **kw: all the keyword arguments, which correspond to the parameters that may require gradients. :type kw: {str(UUID): MXNet NDArray or MXNet Symbol} :returns: the outcome of the InferenceAlgorithm that are determined by the inference algorithm. :rtypes: {str: MXNet NDArray or MXNet Symbol} """ for to_uuid, from_uuid in self._var_ties.items(): kw[to_uuid] = kw[from_uuid] data = {k: v for k, v in zip(self._data_def, args)} variables = add_sample_dimension_to_arrays(F, data) for k, v in self._var_trans.items(): kw[k] = v.transform(kw[k], F=F) add_sample_dimension_to_arrays(F, kw, out=variables) add_sample_dimension_to_arrays(F, self._constants, out=variables) obj = self._infr_method.compute(F=F, variables=variables) with autograd.pause(): # An inference algorithm may directly set the value of a parameter instead of computing its gradient. # This part handles the setting of parameters. for k, v in variables.items(): if k.startswith(SET_PARAMETER_PREFIX): self._infr_params[v[0]] = v[1] return obj
def hybrid_forward(self, F, roi, samples, matches, gt_label, gt_box): """Components can handle batch images Parameters ---------- roi: (B, N, 4), input proposals samples: (B, N), value +1: positive / -1: negative. matches: (B, N), value [0, M), index to gt_label and gt_box. gt_label: (B, M), value [0, num_class), excluding background class. gt_box: (B, M, 4), input ground truth box corner coordinates. Returns ------- cls_target: (B, N), value [0, num_class + 1), including background. box_target: (B, N, C, 4), only foreground class has nonzero target. box_weight: (B, N, C, 4), only foreground class has nonzero weight. """ with autograd.pause(): # cls_target (B, N) cls_target = self._cls_encoder(samples, matches, gt_label) # box_target, box_weight (C, B, N, 4) box_target, box_mask = self._box_encoder( samples, matches, roi, gt_label, gt_box) return cls_target, box_target, box_mask
def _spectral_norm(self, weight: Tensor, u: Tensor) -> Tensor: """ Adapted from https://github.com/apache/incubator- mxnet/blob/master/example/gluon/sn_gan/model.py. """ w = weight w_mat = nd.reshape(w, [w.shape[0], -1]) _u = u _v = None for _ in range(self._num_power_iter): _v = nd.L2Normalization(nd.dot(_u, w_mat)) _u = nd.L2Normalization(nd.dot(_v, w_mat.T)) sigma = nd.sum(nd.dot(_u, w_mat) * _v) # this is different from standard spectral normalization sigma = nd.maximum(nd.ones(1, ctx=self._ctx), sigma / self._coeff) if sigma == 0.0: sigma = EPSILON with autograd.pause(): self._u.set_data(_u) return w / sigma
def forward(self, roi, samples, matches, gt_label, gt_box): """Components can handle batch images Parameters ---------- roi: (B, N, 4), input proposals samples: (B, N), value +1: positive / -1: negative. matches: (B, N), value [0, M), index to gt_label and gt_box. gt_label: (B, M), value [0, num_class), excluding background class. gt_box: (B, M, 4), input ground truth box corner coordinates. Returns ------- cls_target: (B, N), value [0, num_class + 1), including background. box_target: (B, N, C, 4), only foreground class has nonzero target. box_weight: (B, N, C, 4), only foreground class has nonzero weight. """ with autograd.pause(): # cls_target (B, N), set positive as gt_label class + 1, negative as 0, ignored as -1 cls_target = self._cls_encoder(samples, matches, gt_label) # box_target, box_weight (C, B, N, 4), negative and ignored bboxes are set to zero box_target, box_mask = self._box_encoder(samples, matches, roi, gt_label, gt_box) # modify shapes to match predictions # box (C, B, N, 4) -> (B, N, C, 4) box_target = box_target.transpose((1, 2, 0, 3)) box_mask = box_mask.transpose((1, 2, 0, 3)) return cls_target, box_target, box_mask
def __call__(self, module): if hasattr(module, 'rho'): with autograd.pause(): w = module.rho.data() w = w.clip(self.clip_min, self.clip_max) module.rho.data()[:] = w
def hybrid_forward(self, F, box_preds, gt_boxes): """Short summary. Parameters ---------- F : mxnet.nd or mxnet.sym `F` is mxnet.sym if hybridized or mxnet.nd if not. box_preds : mxnet.nd.NDArray Predicted bounding boxes. gt_boxes : mxnet.nd.NDArray Ground-truth bounding boxes. Returns ------- (tuple of) mxnet.nd.NDArray objectness: 0 for negative, 1 for positive, -1 for ignore. center_targets: regression target for center x and y. scale_targets: regression target for scale x and y. weights: element-wise gradient weights for center_targets and scale_targets. class_targets: a one-hot vector for classification. """ with autograd.pause(): box_preds = box_preds.reshape((0, -1, 4)) objness_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=1)) center_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=2)) scale_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=2)) weight_t = F.zeros_like(box_preds.slice_axis(axis=-1, begin=0, end=2)) class_t = F.ones_like(objness_t.tile(reps=(self._num_class))) * -1 batch_ious = self._batch_iou(box_preds, gt_boxes) # (B, N, M) ious_max = batch_ious.max(axis=-1, keepdims=True) # (B, N, 1) objness_t = (ious_max > self._ignore_iou_thresh) * -1 # use -1 for ignored return objness_t, center_t, scale_t, weight_t, class_t
def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var): """Hybrid forward""" if not autograd.is_training(): return F.BatchNorm(x, gamma, beta, running_mean, running_var, name='fwd', **self._kwargs) isum, isqu = F.SumSquare(x) #isum = x.sum(axis=1, exclude=True) #isqu = (x**2).sum(axis=1, exclude=True) N = self.ndevices * x.shape[0] * x.shape[2] * x.shape[3] allreduce = AllReduce(self._prefix) osum, osqu = allreduce(isum, isqu) # calc mean and std mean = osum / N sumvar = osqu - osum * osum / N bias_var = sumvar / N std = F.sqrt(F.maximum(bias_var, self.eps)) # update running mean and var with autograd.pause(): unbias_var = sumvar / (N - 1) self.updater(self.running_mean, self.running_var, mean, unbias_var, self.momentum, x.context) # update running mean and var output = F.DecoupleBatchNorm(x, gamma, beta, mean, std) return output
def train_the(self, batch_xs, batch_ys): loss = [] with autograd.record(): for gpu_index, (batch_x, batch_y) in enumerate(zip(batch_xs, batch_ys)): C_pred = self.net(batch_x) with autograd.pause(): C_label, C_mask = self.loss_mask(batch_y, gpu_index) C_score_weight = nd.where(C_mask > 0, nd.ones_like(C_mask) * 10.0, nd.ones_like(C_mask) * 0.1, ctx=ctx[gpu_index]) Csl = self.LG_loss(C_pred[0], C_label[0], C_score_weight * 0.1) Cbl = self.L2_loss(C_pred[1], C_label[1], C_mask * 1.0) Ccl = self.CE_loss(C_pred[2], C_label[2], C_mask * 0.1) #0.1 after 1day:1.0 loss.append(Csl + Cbl + Ccl) for l in loss: l.backward() self.trainer.step(batch_size) self.record_to_tensorboard_and_save([Csl, Cbl, Ccl])
def compute(self, F, variables): X = variables[self.model.X] Y = variables[self.model.Y] noise_var = variables[self.model.noise_var] D = Y.shape[-1] N = X.shape[-2] kern = self.model.kernel kern_params = kern.fetch_parameters(variables) X, Y, noise_var, kern_params = arrays_as_samples( F, [X, Y, noise_var, kern_params]) K = kern.K(F, X, **kern_params) + \ F.expand_dims(F.eye(N, dtype=X.dtype), axis=0) * \ F.expand_dims(noise_var, axis=-2) L = F.linalg.potrf(K) if self.model.mean_func is not None: mean = self.model.mean_func(F, X) Y = Y - mean LinvY = F.linalg.trsm(L, Y) logdet_l = F.linalg.sumlogdiag(F.abs(L)) tmp = F.sum(F.reshape(F.square(LinvY) + np.log(2. * np.pi), shape=(Y.shape[0], -1)), axis=-1) logL = - logdet_l * D - tmp/2 with autograd.pause(): self.set_parameter(variables, self.posterior.X, X[0]) self.set_parameter(variables, self.posterior.L, L[0]) self.set_parameter(variables, self.posterior.LinvY, LinvY[0]) return logL
def forward(self, rcnn_cls_pred, rcnn_bbox_pred, rcnn_cls_gt, rcnn_bbox_gt): with autograd.pause(): ctx = rcnn_cls_pred.context roi_num = rcnn_cls_pred.shape[0] roi_idx = nd.arange(roi_num, ctx=ctx).reshape(-1, 1) fg_bbox_mask = (rcnn_cls_gt > 0).reshape(0, 1, 1) bbox_weights = nd.zeros_like(rcnn_bbox_gt).reshape(0, -1, 4) bbox_weights[roi_idx, rcnn_cls_gt[:], :] = \ self._bbox_weights.data(ctx).broadcast_to((roi_num, 1, 4)) * fg_bbox_mask bbox_weights = bbox_weights.reshape(0, -1) # rcnn_cls_pred.shape (roi_num, num_classes) rcnn_cls_log = nd.log(nd.clip(rcnn_cls_pred, 1e-14, 1)) cls_log_loss = -nd.sum(rcnn_cls_log[ roi_idx, rcnn_cls_gt]) / self._roi_batch_size.data(ctx) # rcnn_bbox_pred.shape (roi_num, num_classes*4) rcnn_bbox_smooth_l1 = nd.smooth_l1(rcnn_bbox_pred - rcnn_bbox_gt, scalar=1.0) bbox_smooth_l1_loss = nd.sum( rcnn_bbox_smooth_l1 * bbox_weights) / self._roi_batch_size.data(ctx) return cls_log_loss, bbox_smooth_l1_loss
def forward(self, roi, samples, matches, gt_label, gt_box): """Components can handle batch images Parameters ---------- roi: (B, N, 4), input proposals samples: (B, N), value +1: positive / -1: negative. matches: (B, N), value [0, M), index to gt_label and gt_box. gt_label: (B, M), value [0, num_class), excluding background class. gt_box: (B, M, 4), input ground truth box corner coordinates. Returns ------- cls_target: (B, N), value [0, num_class + 1), including background. box_target: (B, N, C, 4), only foreground class has nonzero target. box_weight: (B, N, C, 4), only foreground class has nonzero weight. """ with autograd.pause(): # cls_target (B, N) cls_target = self._cls_encoder(samples, matches, gt_label) # box_target, box_weight (C, B, N, 4) box_target, box_mask = self._box_encoder( samples, matches, roi, gt_box) # modify shapes to match predictions # box (C, B, N, 4) -> (B, N, C, 4) #print("cls_target:{} box_target:{} box_mask:{}".format(cls_target.shape,box_target.shape,box_mask.shape)) #cls_target = cls_target box_target = box_target.expand_dims(axis=2) box_mask = box_mask.expand_dims(axis=2) return cls_target, box_target, box_mask
def batch_forward(self, batch_data, validation=False): splitted_batch = { k: gluon.utils.split_and_load(v, ctx_list=self.args.ctx, even_split=False) for k, v in batch_data.items() } if 'instances' in splitted_batch: splitted_batch['instances'] = [ masks.reshape(shape=(-3, -2)) for masks in splitted_batch['instances'] ] metrics = self.val_metrics if validation else self.train_metrics losses_logging = defaultdict(list) with autograd.record(True) if not validation else autograd.pause( False): outputs = [ self.net(image, points) for image, points in zip( splitted_batch['images'], splitted_batch['points']) ] losses = [] for ictx, ctx_output in enumerate(outputs): loss = 0.0 loss = self.add_loss( 'instance_loss', loss, losses_logging, validation, lambda: (ctx_output.instances, splitted_batch['instances'][ictx])) loss = self.add_loss( 'segmentation_loss', loss, losses_logging, validation, lambda: (ctx_output.semantic, splitted_batch['semantic'][ictx])) loss = self.add_loss( 'proposals_loss', loss, losses_logging, validation, lambda: (ctx_output.instances, ctx_output.proposals, splitted_batch['instances'][ictx])) with autograd.pause(): for m in metrics: m.update( *(getattr(ctx_output, x) for x in m.pred_outputs), *(splitted_batch[x][ictx] for x in m.gt_outputs)) losses.append(loss) return losses, losses_logging, splitted_batch, outputs
def hybrid_forward(self, F, x, num=0, fix_conv=False): if self.fix_layers == 0: out = F.L2Normalization(self.fc1(self.feats3(self.feats2(self.feats1(x))))) elif self.fix_layers == 1: with ag.pause(): x = self.feats1(x) out = F.L2Normalization(self.fc1(self.feats3(self.feats2(x)))) elif self.fix_layers == 2: with ag.pause(): x = self.feats2(self.feats1(x)) out = F.L2Normalization(self.fc1(self.feats3(x))) elif self.fix_layers == 3: if self.fix_fc: with ag.pause(): x = self.fc1(self.feats3(self.feats2(self.feats1(x)))) out = F.L2Normalization(x) else: with ag.pause(): x = self.feats3(self.feats2(self.feats1(x))) out = F.L2Normalization(self.fc1(x)) if self.fw: for i in range(num + 1): if i < num: with ag.pause(): fc = eval('self.fc' + str(i + 2)) if i == 0: output = fc(out) else: output = mx.nd.concat(output, fc(out), dim=1) else: fc = eval('self.fc' + str(i + 2)) if i == 0: output = fc(out) else: output = mx.nd.concat(output, fc(out), dim=1) return out, output else: for i in range(num + 1): fc = eval('self.fc' + str(i + 2)) if i == 0: output = fc(out) else: output = mx.nd.concat(output, fc(out), dim=1) return out, output
def add_batchid(self, F, bbox): num_roi = self._num_sample if autograd.is_training() else self._rpn_test_post_nms with autograd.pause(): roi_batchid = F.arange(0, self._max_batch, repeat=num_roi) # remove batch dim because ROIPooling require 2d input roi = F.concat(*[roi_batchid.reshape((-1, 1)), bbox.reshape((-1, 4))], dim=-1) roi = F.stop_gradient(roi) return roi
def hybrid_forward(self, F, box_preds, gt_boxes, obj_t, centers_t, scales_t, weights_t, clas_t): with autograd.pause(): dynamic_t = self._dynamic_target(box_preds, gt_boxes) obj, centers, scales, weights, clas = zip( dynamic_t, [obj_t, centers_t, scales_t, weights_t, clas_t]) ''' mask obj[1] > 0 shape: [batch, h*w*9, 1] obj[1] > 0 如果这个grid cell有对象相应位置为1,否则为0 obj[1] <=0 表示这个grid cell没有对象,因为要么是被忽略的,要么是背景。被忽略的,其中心不在这个grid cell objectness: 1: 正样本,表示这个框内确实存在目标 0: 负样本,表示上面的 ious_max < self._ignore_iou_thresh,这个位置不该忽略,纳入负样本损失计算 -1: 忽略不计算, 表示上面的 ious_max > self._ignore_iou_thresh 表示这个grid cell没有正样本, 同时也检测出是背景,那么损失计算就忽略, 如果:grid cell是正样本,但是ious_max > self._ignore_iou_thresh,那么这个位置其实忽略与否都不重要了,因为已经检测正确了。 mask2 = mask.tile(reps=(2,)) shape: [batch, 3*(h*w*3), 2] scale_targets = F.where(mask2, scales[1], scales[0]) weights = F.where(mask2, weights[1], weights[0]) 如果当前grid cell有物体, 那么就是取出该物体的中心坐标(wh, weight),没有的物体就设置为0。 weight_t 是在计算损失时,(x,y)和(weight, height)项的平衡系数 mask3 = mask.tile(reps=(self._num_class,)) shape:[batch, 3*(h*w*3), 20] class_targets = F.where(mask3, clas[1], clas[0]) 含义同上,如果有物体,那么就是取出其ground-truth 值(这是一个one-hot编码,只有一个是1,其余都是0),否则就设置为0 获得的class_target, 如果当前grid cell 有物体,那么就对应的class_ids就是其ground-truth值,否则就是-1 但是预测得one-hot可能不那么完全。 class_mask = mask3 * (class_targets >= 0) shape:[batch, 3*(h*w*3), 20] 上述返回值中,带有“-1”的是objness, class_targets ''' mask = obj[1] > 0 objectness = F.where(mask, obj[1], obj[0]) mask2 = mask.tile(reps=(2, )) center_targets = F.where(mask2, centers[1], centers[0]) scale_targets = F.where(mask2, scales[1], scales[0]) weights = F.where(mask2, weights[1], weights[0]) mask3 = mask.tile(reps=(self._num_class, )) class_targets = F.where(mask3, clas[1], clas[0]) # 就是一个one-hot编码 class_mask = mask.tile( reps=(self._num_class, )) * (class_targets >= 0) return [ F.stop_gradient(x) for x in [ objectness, center_targets, scale_targets, weights, class_targets, class_mask ] ]
def sample(self, num_samples: Optional[int] = None, dtype=np.float32) -> Tensor: with autograd.pause(): s = self.base_distribution.sample(num_samples=num_samples, dtype=dtype) for t in self.transforms: s = t.f(s) return s
def compute(self, F, variables): X = variables[self.model.X] Y = variables[self.model.Y] Z = variables[self.model.inducing_inputs] noise_var = variables[self.model.noise_var] D = Y.shape[-1] M = Z.shape[-2] kern = self.model.kernel kern_params = kern.fetch_parameters(variables) X, Y, Z, noise_var, kern_params = arrays_as_samples( F, [X, Y, Z, noise_var, kern_params]) noise_var_m = F.expand_dims(noise_var, axis=-2) Kuu = kern.K(F, Z, **kern_params) if self.jitter > 0.: Kuu = Kuu + F.expand_dims(F.eye(M, dtype=Z.dtype), axis=0) * \ self.jitter Kuf = kern.K(F, Z, X, **kern_params) Kff_diag = kern.Kdiag(F, X, **kern_params) L = F.linalg.potrf(Kuu) LinvKuf = F.linalg.trsm(L, Kuf) A = F.expand_dims(F.eye(M, dtype=Z.dtype), axis=0) + \ F.broadcast_div(F.linalg.syrk(LinvKuf), noise_var_m) LA = F.linalg.potrf(A) if self.model.mean_func is not None: mean = self.model.mean_func(F, X) Y = Y - mean LAInvLinvKufY = F.linalg.trsm(LA, F.linalg.gemm2(LinvKuf, Y)) logL = -D * F.linalg.sumlogdiag(LA) logL = logL - F.sum(F.sum(F.square(Y) / noise_var_m + np.log(2. * np.pi) + F.log(noise_var_m), axis=-1), axis=-1) / 2 logL = logL + F.sum(F.sum( F.square(LAInvLinvKufY) / (2 * F.square(noise_var_m)), axis=-1), axis=-1) logL = logL - D * F.sum(Kff_diag / (2 * noise_var), axis=-1) logL = logL + D * F.sum( F.sum(F.square(LinvKuf) / (2. * noise_var_m), axis=-1), axis=-1) with autograd.pause(): wv = F.broadcast_div( F.linalg.trsm(L, F.linalg.trsm(LA, LAInvLinvKufY, transpose=True), transpose=True), noise_var_m) self.set_parameter(variables, self.graphs[1].wv, wv[0]) self.set_parameter(variables, self.graphs[1].L, L[0]) self.set_parameter(variables, self.graphs[1].LA, LA[0]) return logL
def hybrid_forward(self, F, x, num=0, fix_cnn=False): # x = self.features(x) # x = self.output(x) if fix_cnn: with ag.pause(): x = self.features[:7](x) x = self.features[7][0](x) x = self.features[7][1](x) x = self.features[8:](x) out = F.L2Normalization(x) feat = out else: x = self.features(x) out = F.L2Normalization(x) feat = out if self.fw: for i in range(num + 1): if i < num: with ag.pause(): fc = eval('self.fc' + str(i + 2)) if i == 0: output = fc(out) else: output = mx.nd.concat(output, fc(out), dim=1) else: fc = eval('self.fc' + str(i + 2)) if i == 0: output = fc(out) else: output = mx.nd.concat(output, fc(out), dim=1) return feat, output else: for i in range(num + 1): fc = eval('self.fc' + str(i + 2)) if i == 0: output = fc(out) else: output = mx.nd.concat(output, fc(out), dim=1) return feat, output
def forward(self, roi): F = mx.nd with autograd.pause(): for i in range(self._rpn_train_pre_nms): if roi[0,i,0] == -1: #index.append([i]) break #rpn_index = F.Custom(roi, op_type='clip_rpn_box') roi = roi.slice_axis(axis=1, begin=0, end=i) return roi
def add_batchid(self, F, bbox): with autograd.pause(): roi_batchid = F.arange(0, self._max_batch, repeat=self._max_roi).reshape( (-1, self._max_roi)) roi_batchid = F.slice_like(roi_batchid, bbox * 0, axes=(0, 1)) roi = F.concat( *[roi_batchid.reshape((-1, 1)), bbox.reshape((-1, 4))], dim=-1) return roi
def update(self, rpn_cls_label, pred_rpn_box_cls): with ag.pause(): pred_rpn_box_argmax = pred_rpn_box_cls.reshape(2, -1).argmax(axis=0) rpn_cls_label = rpn_cls_label.reshape(-1) mask = (rpn_cls_label != -1).astype('f') rpn_accu = mx.nd.sum( mask * (pred_rpn_box_argmax == rpn_cls_label).astype('f')) / mx.nd.sum(mask) self.preds.append(rpn_accu.asscalar())
def hybrid_forward(self, F, box_preds, gt_boxes, obj_t, centers_t, scales_t, weights_t, clas_t): """Short summary. Parameters ---------- F : mxnet.nd or mxnet.sym `F` is mxnet.sym if hybridized or mxnet.nd if not. box_preds : mxnet.nd.NDArray Predicted bounding boxes. gt_boxes : mxnet.nd.NDArray Ground-truth bounding boxes. obj_t : mxnet.nd.NDArray Prefetched Objectness targets. centers_t : mxnet.nd.NDArray Prefetched regression target for center x and y. scales_t : mxnet.nd.NDArray Prefetched regression target for scale x and y. weights_t : mxnet.nd.NDArray Prefetched element-wise gradient weights for center_targets and scale_targets. clas_t : mxnet.nd.NDArray Prefetched one-hot vector for classification. Returns ------- (tuple of) mxnet.nd.NDArray objectness: 0 for negative, 1 for positive, -1 for ignore. center_targets: regression target for center x and y. scale_targets: regression target for scale x and y. weights: element-wise gradient weights for center_targets and scale_targets. class_targets: a one-hot vector for classification. """ with autograd.pause(): dynamic_t = self._dynamic_target(box_preds, gt_boxes) # use fixed target to override dynamic targets obj, centers, scales, weights, clas = zip( dynamic_t, [obj_t, centers_t, scales_t, weights_t, clas_t]) mask = obj[1] > 0 objectness = F.where(mask, obj[1], obj[0]) mask2 = mask.tile(reps=(2,)) center_targets = F.where(mask2, centers[1], centers[0]) scale_targets = F.where(mask2, scales[1], scales[0]) weights = F.where(mask2, weights[1], weights[0]) mask3 = mask.tile(reps=(self._num_class,)) class_targets = F.where(mask3, clas[1], clas[0]) smooth_weight = 1. / self._num_class if self._label_smooth: smooth_weight = 1. / self._num_class class_targets = F.where( class_targets > 0.5, class_targets - smooth_weight, class_targets) class_targets = F.where( class_targets < -0.5, class_targets, F.ones_like(class_targets) * smooth_weight) class_mask = mask.tile(reps=(self._num_class,)) * (class_targets >= 0) return [F.stop_gradient(x) for x in [objectness, center_targets, scale_targets, weights, class_targets, class_mask]]
def _spectral_norm(self): """ spectral normalization """ w = self.params.get('weight').data(self.ctx) w_mat = nd.reshape(w, [w.shape[0], -1]) _u = self.u.data(self.ctx) _v = None for _ in range(POWER_ITERATION): _v = nd.L2Normalization(nd.dot(_u, w_mat)) _u = nd.L2Normalization(nd.dot(_v, w_mat.T)) sigma = nd.sum(nd.dot(_u, w_mat) * _v) if sigma == 0.: sigma = EPSILON with autograd.pause(): self.u.set_data(_u) return w / sigma
def forward(self, bbox, anchor, width, height): """ RPNTargetGenerator is only used in data transform with no batch dimension. Be careful there's numpy operations inside Parameters ---------- bbox: (M, 4) ground truth boxes with corner encoding. anchor: (N, 4) anchor boxes with corner encoding. width: int width of input image height: int height of input image Returns ------- cls_target: (N,) value +1: pos, 0: neg, -1: ignore box_target: (N, 4) only anchors whose cls_target > 0 has nonzero box target box_mask: (N, 4) only anchors whose cls_target > 0 has nonzero mask """ F = mx.nd with autograd.pause(): # calculate ious between (N, 4) anchors and (M, 4) bbox ground-truths # ious is (N, M) ious = mx.nd.contrib.box_iou(anchor, bbox, format='corner') # mask out invalid anchors, (N, 4) a_xmin, a_ymin, a_xmax, a_ymax = F.split(anchor, num_outputs=4, axis=-1) invalid_mask = (a_xmin < 0) + (a_ymin < 0) + (a_xmax >= width) + (a_ymax >= height) invalid_mask = F.repeat(invalid_mask, repeats=bbox.shape[0], axis=-1) ious = F.where(invalid_mask, mx.nd.ones_like(ious) * -1, ious) samples, matches = self._sampler(ious) # training targets for RPN cls_target, _ = self._cls_encoder(samples) box_target, box_mask = self._box_encoder( samples.expand_dims(axis=0), matches.expand_dims(0), anchor.expand_dims(axis=0), bbox.expand_dims(0)) return cls_target, box_target[0], box_mask[0]
def hybrid_forward(self, F, x, gt_box=None): """Forward Faster-RCNN network. The behavior during traing and inference is different. Parameters ---------- x : mxnet.nd.NDArray or mxnet.symbol The network input tensor. gt_box : type, only required during training The ground-truth bbox tensor with shape (1, N, 4). Returns ------- (ids, scores, bboxes) During inference, returns final class id, confidence scores, bounding boxes. """ def _split(x, axis, num_outputs, squeeze_axis): x = F.split(x, axis=axis, num_outputs=num_outputs, squeeze_axis=squeeze_axis) if isinstance(x, list): return x else: return [x] feat = self.features(x) # RPN proposals if autograd.is_training(): rpn_score, rpn_box, raw_rpn_score, raw_rpn_box, anchors = \ self.rpn(feat, F.zeros_like(x)) rpn_box, samples, matches = self.sampler(rpn_box, rpn_score, gt_box) else: _, rpn_box = self.rpn(feat, F.zeros_like(x)) # create batchid for roi num_roi = self._num_sample if autograd.is_training() else self._rpn_test_post_nms with autograd.pause(): roi_batchid = F.arange(0, self._max_batch, repeat=num_roi) # remove batch dim because ROIPooling require 2d input rpn_roi = F.concat(*[roi_batchid.reshape((-1, 1)), rpn_box.reshape((-1, 4))], dim=-1) rpn_roi = F.stop_gradient(rpn_roi) # ROI features if self._roi_mode == 'pool': pooled_feat = F.ROIPooling(feat, rpn_roi, self._roi_size, 1. / self._stride) elif self._roi_mode == 'align': pooled_feat = F.contrib.ROIAlign(feat, rpn_roi, self._roi_size, 1. / self._stride, sample_ratio=2) else: raise ValueError("Invalid roi mode: {}".format(self._roi_mode)) # RCNN prediction top_feat = self.top_features(pooled_feat) avg_feat = self.global_avg_pool(top_feat) cls_pred = self.class_predictor(avg_feat) box_pred = self.box_predictor(avg_feat) # cls_pred (B * N, C) -> (B, N, C) cls_pred = cls_pred.reshape((self._max_batch, num_roi, self.num_class + 1)) # box_pred (B * N, C * 4) -> (B, N, C, 4) box_pred = box_pred.reshape((self._max_batch, num_roi, self.num_class, 4)) # no need to convert bounding boxes in training, just return if autograd.is_training(): if self._additional_output: return (cls_pred, box_pred, rpn_box, samples, matches, raw_rpn_score, raw_rpn_box, anchors, top_feat) return (cls_pred, box_pred, rpn_box, samples, matches, raw_rpn_score, raw_rpn_box, anchors) # cls_ids (B, N, C), scores (B, N, C) cls_ids, scores = self.cls_decoder(F.softmax(cls_pred, axis=-1)) # cls_ids, scores (B, N, C) -> (B, C, N) -> (B, C, N, 1) cls_ids = cls_ids.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) scores = scores.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) # box_pred (B, N, C, 4) -> (B, C, N, 4) box_pred = box_pred.transpose((0, 2, 1, 3)) # rpn_boxes (B, N, 4) -> B * (1, N, 4) rpn_boxes = _split(rpn_box, axis=0, num_outputs=self._max_batch, squeeze_axis=False) # cls_ids, scores (B, C, N, 1) -> B * (C, N, 1) cls_ids = _split(cls_ids, axis=0, num_outputs=self._max_batch, squeeze_axis=True) scores = _split(scores, axis=0, num_outputs=self._max_batch, squeeze_axis=True) # box_preds (B, C, N, 4) -> B * (C, N, 4) box_preds = _split(box_pred, axis=0, num_outputs=self._max_batch, squeeze_axis=True) # per batch predict, nms, each class has topk outputs results = [] for rpn_box, cls_id, score, box_pred in zip(rpn_boxes, cls_ids, scores, box_preds): # box_pred (C, N, 4) rpn_box (1, N, 4) -> bbox (C, N, 4) bbox = self.box_decoder(box_pred, self.box_to_center(rpn_box)) # res (C, N, 6) res = F.concat(*[cls_id, score, bbox], dim=-1) # res (C, self.nms_topk, 6) res = F.contrib.box_nms( res, overlap_thresh=self.nms_thresh, topk=self.nms_topk, valid_thresh=0.0001, id_index=0, score_index=1, coord_start=2, force_suppress=True) # res (C * self.nms_topk, 6) res = res.reshape((-3, 0)) results.append(res) # result B * (C * topk, 6) -> (B, C * topk, 6) result = F.stack(*results, axis=0) ids = F.slice_axis(result, axis=-1, begin=0, end=1) scores = F.slice_axis(result, axis=-1, begin=1, end=2) bboxes = F.slice_axis(result, axis=-1, begin=2, end=6) if self._additional_output: return ids, scores, bboxes, feat return ids, scores, bboxes
positive_weight = 5.0 negative_weight = 0.1 class_weight = 1.0 xywh_weight = 5.0 for epoch in range(maxEpoch): trainIter.reset() tic = time.time() for batchidx, batch in enumerate(trainIter): Y0 = batch.label[0].as_in_context(ctx) X = batch.data[0].as_in_context(ctx) with autograd.record(): Y1 = net(X) predCls, predObj, predXYWH = parse_net_output(Y1,numClasses, box_per_cell) with autograd.pause(): #generate ground online boxMask, boxCls, boxObj, boxXYWH = parse_groundtruth_for_target(Y0,box_per_cell,predXYWH) if 0: lines = [] for y in range(16): for x in range(16): a = boxMask[0,y,x,0,0].asnumpy()[0] b = boxMask[0,y,x,1,0].asnumpy()[0] c = '-' #pdb.set_trace() if a > 0.5: c = boxXYWH[0,y,x,0,:].asnumpy().tolist() c = ['%.2f'%cc for cc in c] c = '-'.join(c) elif b > 0.5: c = boxXYWH[0,y,x,1,:].asnumpy().tolist()
def hybrid_forward(self, F, img, *x): """Forward RPN. The behavior during training and inference is different. Parameters ---------- img : mxnet.nd.NDArray or mxnet.symbol The original input image. x : mxnet.nd.NDArray or mxnet.symbol(s) Feature tensor(s). Returns ------- (rpn_score, rpn_box) Returns predicted scores and regions which are candidates of objects. """ if autograd.is_training(): pre_nms = self._train_pre_nms post_nms = self._train_post_nms else: pre_nms = self._test_pre_nms post_nms = self._test_post_nms anchors = [] rpn_pre_nms_proposals = [] raw_rpn_scores = [] raw_rpn_boxes = [] if self._multi_level: # Generate anchors in [P2, P3, P4, P5, P6] order for i, feat in enumerate(x): ag = self.anchor_generator[i] anchor = ag(feat) rpn_score, rpn_box, raw_rpn_score, raw_rpn_box = \ self.rpn_head(feat) rpn_pre = self.region_proposer(anchor, rpn_score, rpn_box, img) anchors.append(anchor) rpn_pre_nms_proposals.append(rpn_pre) raw_rpn_scores.append(raw_rpn_score) raw_rpn_boxes.append(raw_rpn_box) rpn_pre_nms_proposals = F.concat(*rpn_pre_nms_proposals, dim=1) raw_rpn_scores = F.concat(*raw_rpn_scores, dim=1) raw_rpn_boxes = F.concat(*raw_rpn_boxes, dim=1) else: x = x[0] anchors = self.anchor_generator(x) x = self.conv1(x) raw_rpn_scores = self.score(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 1)) rpn_scores = F.sigmoid(F.stop_gradient(raw_rpn_scores)) raw_rpn_boxes = self.loc(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 4)) rpn_boxes = F.stop_gradient(raw_rpn_boxes) rpn_pre_nms_proposals = self.region_proposer( anchors, rpn_scores, rpn_boxes, img) # Non-maximum suppression with autograd.pause(): tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=self._nms_thresh, topk=pre_nms, coord_start=1, score_index=0, id_index=-1, force_suppress=True) # slice post_nms number of boxes result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms) rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1) rpn_boxes = F.slice_axis(result, axis=-1, begin=1, end=None) if autograd.is_training(): # return raw predictions as well in training for bp return rpn_scores, rpn_boxes, raw_rpn_scores, raw_rpn_boxes, anchors return rpn_scores, rpn_boxes
def forward(self, img, xs, anchors, offsets, gt_boxes, gt_ids, gt_mixratio=None): """Generating training targets that do not require network predictions. Parameters ---------- img : mxnet.nd.NDArray Original image tensor. xs : list of mxnet.nd.NDArray List of feature maps. anchors : mxnet.nd.NDArray YOLO3 anchors. offsets : mxnet.nd.NDArray Pre-generated x and y offsets for YOLO3. gt_boxes : mxnet.nd.NDArray Ground-truth boxes. gt_ids : mxnet.nd.NDArray Ground-truth IDs. gt_mixratio : mxnet.nd.NDArray, optional Mixup ratio from 0 to 1. Returns ------- (tuple of) mxnet.nd.NDArray objectness: 0 for negative, 1 for positive, -1 for ignore. center_targets: regression target for center x and y. scale_targets: regression target for scale x and y. weights: element-wise gradient weights for center_targets and scale_targets. class_targets: a one-hot vector for classification. """ assert isinstance(anchors, (list, tuple)) all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0) assert isinstance(offsets, (list, tuple)) all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0) num_anchors = np.cumsum([a.size // 2 for a in anchors]) num_offsets = np.cumsum([o.size // 2 for o in offsets]) _offsets = [0] + num_offsets.tolist() assert isinstance(xs, (list, tuple)) assert len(xs) == len(anchors) == len(offsets) # orig image size orig_height = img.shape[2] orig_width = img.shape[3] with autograd.pause(): # outputs shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape( (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0], axis=0) center_targets = nd.zeros_like(shape_like) scale_targets = nd.zeros_like(center_targets) weights = nd.zeros_like(center_targets) objectness = nd.zeros_like(weights.split(axis=-1, num_outputs=2)[0]) class_targets = nd.one_hot(objectness.squeeze(axis=-1), depth=self._num_class) class_targets[:] = -1 # prefill -1 for ignores # for each ground-truth, find the best matching anchor within the particular grid # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map # then only the anchor in (3, 4) is going to be matched gtx, gty, gtw, gth = self.bbox2center(gt_boxes) shift_gt_boxes = nd.concat(-0.5 * gtw, -0.5 * gth, 0.5 * gtw, 0.5 * gth, dim=-1) anchor_boxes = nd.concat(0 * all_anchors, all_anchors, dim=-1) # zero center anchors shift_anchor_boxes = self.bbox2corner(anchor_boxes) ious = nd.contrib.box_iou(shift_anchor_boxes, shift_gt_boxes).transpose((1, 0, 2)) # real value is required to process, convert to Numpy matches = ious.argmax(axis=1).asnumpy() # (B, M) valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1) # (B, M) np_gtx, np_gty, np_gtw, np_gth = [x.asnumpy() for x in [gtx, gty, gtw, gth]] np_anchors = all_anchors.asnumpy() np_gt_ids = gt_ids.asnumpy() np_gt_mixratios = gt_mixratio.asnumpy() if gt_mixratio is not None else None # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop # should not be a problem right now. Switch to better solution is needed. for b in range(matches.shape[0]): for m in range(matches.shape[1]): if valid_gts[b, m] < 1: break match = int(matches[b, m]) nlayer = np.nonzero(num_anchors > match)[0][0] height = xs[nlayer].shape[2] width = xs[nlayer].shape[3] gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0], np_gtw[b, m, 0], np_gth[b, m, 0]) # compute the location of the gt centers loc_x = int(gtx / orig_width * width) loc_y = int(gty / orig_height * height) # write back to targets index = _offsets[nlayer] + loc_y * width + loc_x center_targets[b, index, match, 0] = gtx / orig_width * width - loc_x # tx center_targets[b, index, match, 1] = gty / orig_height * height - loc_y # ty scale_targets[b, index, match, 0] = np.log(gtw / np_anchors[match, 0]) scale_targets[b, index, match, 1] = np.log(gth / np_anchors[match, 1]) weights[b, index, match, :] = 2.0 - gtw * gth / orig_width / orig_height objectness[b, index, match, 0] = ( np_gt_mixratios[b, m, 0] if np_gt_mixratios is not None else 1) class_targets[b, index, match, :] = 0 class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1 # since some stages won't see partial anchors, so we have to slice the correct targets objectness = self._slice(objectness, num_anchors, num_offsets) center_targets = self._slice(center_targets, num_anchors, num_offsets) scale_targets = self._slice(scale_targets, num_anchors, num_offsets) weights = self._slice(weights, num_anchors, num_offsets) class_targets = self._slice(class_targets, num_anchors, num_offsets) return objectness, center_targets, scale_targets, weights, class_targets