def cascade_rcnn(self, F, feature, roi, sampler, gt_box): """Forward Faster-RCNN network. The behavior during traing and inference is different. Parameters ---------- feature: feature map roi: ROI region to be pooled (decoded bbox) Returns ------- box_pred: bbox prediction(encoded bbox) cls_pred: cls prediction """ if autograd.is_training(): roi, samples, matches = sampler(roi, gt_box) sample_data = edict() sample_data.roi = roi sample_data.samples = samples sample_data.matches = matches pooled_feat = self.extract_ROI(F=F, feature=feature, bbox=roi) top_feat = self.top_features(pooled_feat) cls_pred = self.class_predictor(top_feat) box_pred = self.box_predictor(top_feat).reshape((-1, 1, 4)).transpose( (1, 0, 2)) if autograd.is_training(): return cls_pred, box_pred, sample_data else: return cls_pred, box_pred, None
def auto_grad(): """ 对函数 y=2x**2 求关于列向量 x 的梯度 4x :return: """ x = nd.arange(4).reshape((4, 1)) logger.info("autograd 数组:") logger.info(x) # 调用attach_grad函数来申请存储梯度所需要的内存 x.attach_grad() logger.info("autograd.is_training():") logger.info(autograd.is_training()) # 调用record函数来要求MXNet记录与求梯度有关的计算。 with autograd.record(): y = 2 * nd.dot(x.T, x) logger.info(autograd.is_training()) logger.info(y) # 调用backward函数自动求梯度 y.backward() logger.info("autograd 梯度:") logger.info(x.grad)
def hybrid_forward(self, F, x): all_box_centers = [] all_box_scales = [] all_objectness = [] all_class_pred = [] all_anchors = [] all_offsets = [] all_detections = [] routes = [] for stage, block, output in zip(self.stages, self.yolo_blocks, self.yolo_outputs): x = stage(x) routes.append(x) for i, block, output in zip(range(len(routes)), self.yolo_blocks, self.yolo_outputs): x, tip = block(x) if autograd.is_training(): box_centers, box_scales, objness, class_pred, anchors, offsets = output( tip) all_box_centers.append(box_centers) all_box_scales.append(box_scales) all_objectness.append(objness) all_class_pred.append(class_pred) all_anchors.append(anchors) all_offsets.append(offsets) else: detections = output(tip) all_detections.append(detections) if i >= len(routes) - 1: break x = self.transitions[i](x) upsample = _upsample(x, stride=2) x = F.concat(upsample, routes[::-1][i + 1], dim=1) if autograd.is_training(): # return raw predictions return (F.concat(*all_box_centers, dim=-2), F.concat(*all_box_scales, dim=-2), F.concat(*all_objectness, dim=-2), F.concat(*all_class_pred, dim=-2)) result = F.concat(*all_detections, dim=1) # apply nms per class if self.nms_thresh > 0 and self.nms_thresh < 1: result = F.contrib.box_nms(result, overlap_thresh=self.nms_thresh, topk=self.nms_topk, id_index=0, score_index=1, coord_start=2, force_suppress=False) if self.post_nms > 0: result = result.slice_axis(axis=1, begin=0, end=self.post_nms) ids = result.slice_axis(axis=-1, begin=0, end=1) scores = result.slice_axis(axis=-1, begin=1, end=2) bboxes = result.slice_axis(axis=-1, begin=2, end=None) return ids, scores, bboxes
def net(X): X = X.reshape((-1, num_inputs)) H1 = (nd.dot(X, W1) + b1).relu() if autograd.is_training(): # 只在训练的时候丢弃 H1 = dropout(H1, drop_prob1) # 在第一次全连接层丢弃 H2 = (nd.dot(H1, W2) + b2).relu() if autograd.is_training(): # 只在训练的时候丢弃 H2 = dropout(H2, drop_prob2) # 在第而次全连接层丢弃 return nd.dot(H2, W3) + b3
def net(X): X = X.reshape(-1, num_inputs) H1 = npx.relu(np.dot(X, W1) + b1) if autograd.is_training(): H1 = dropout(H1, drop_prob1) H2 = npx.relu(np.dot(H1, W2) + b2) if autograd.is_training(): H2 = dropout(H2, drop_prob2) return np.dot(H2, W3) + b3
def net(X): X = X.reshape((-1, num_inputs)) H1 = (nd.dot(X, W1) + b1).relu() if autograd.is_training(): H1 = dropout(H1, drop_prob1) H2 = (nd.dot(H1, W2) + b2).relu() if autograd.is_training(): H2 = dropout(H2, drop_prob2) return nd.dot(H2, W3) + b3
def net(x): x = x.reshape((-1, num_inputs)) h1 = (nd.dot(x, w1) + b1).relu() if(autograd.is_training()): # only drop out in trainning mode h1 = dropout(h1, drop_prob1) h2 = (nd.dot(h1, w2) + b2).relu() if (autograd.is_training()): # only drop out in trainning mode h2 = dropout(h2, drop_prob2) return nd.dot(h2, w3) + b3
def net(X): X = X.reshape((-1, num_inputs)) H1 = (nd.dot(X, W1) + b1).relu() if autograd.is_training(): # 只在训练模型时使用丢弃法 H1 = dropout(H1, drop_prob1) # 在第一层全连接后添加丢弃层 H2 = (nd.dot(H1, W2) + b2).relu() if autograd.is_training(): H2 = dropout(H2, drop_prob2) # 在第二层全连接后添加丢弃层 return nd.dot(H2, W3) + b3
def net(x): x = x.reshape((-1, 784)) H1 = ( nd.dot(x,W1)+b1 ).relu() if autograd.is_training(): H1 = dropout(H1,drop_prob1) H2 = ( nd.dot(H1,W2)+b2 ).relu() if autograd.is_training(): H2 = dropout(H2,drop_prob2) return nd.dot(H2, W3) + b3
def net(X): pro1, pro2 = 0.2, 0.5 X = X.reshape(-1, num_inputs) H1 = (nd.dot(X, W1) + b1).relu() if autograd.is_training(): dropout(H1, drop_prob1) H2 = nd.dot(H1, W2) + b2 if autograd.is_training(): dropout(H2, drop_prob2) return nd.dot(H2, W3) + b3
def hybrid_forward(self, F, *x): if autograd.is_training(): pre_nms = self.train_pre_nms post_nms = self.train_post_nms else: pre_nms = self.test_pre_nms post_nms = self.test_post_nms anchors = [] rpn_pre_nms_proposals = [] raw_rpn_scores = [] raw_rpn_boxes = [] for i, feat in enumerate(x): # raw_rpn_score (B, HWN, 1) # raw_rpn_box (B, HWN, 4) rpn_score, rpn_box, raw_rpn_score, raw_rpn_box = self.head(feat) with autograd.pause(): anchor = self.anchor_generator[i](feat) anchor = anchor.reshape((-1, 4)) # (1, N, 4) anchors.append(anchor) # (B, N, 4) rpn_box = self.box_decoder(rpn_box, anchor) rpn_box = self.box_clip(rpn_box) rpn_pre = F.concat(rpn_score, rpn_box, dim=-1) if self.per_level_nms: rpn_pre = F.contrib.box_nms(rpn_pre, overlap_thresh=self.nms_thresh, topk=pre_nms // len(x), coord_start=1, score_index=0, id_index=-1) rpn_pre_nms_proposals.append(rpn_pre) raw_rpn_scores.append(raw_rpn_score) raw_rpn_boxes.append(raw_rpn_box) rpn_pre_nms_proposals = F.concat(*rpn_pre_nms_proposals, dim=1) raw_rpn_scores = F.concat(*raw_rpn_scores, dim=1) raw_rpn_boxes = F.concat(*raw_rpn_boxes, dim=1) with autograd.pause(): if self.per_level_nms: # Sort the proposals by scores. So the overlap_thresh=2 tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=2, topk=pre_nms + 1, coord_start=1, score_index=0, id_index=-1) else: tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=self.nms_thresh, topk=pre_nms, coord_start=1, score_index=0, id_index=-1) result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms) rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1) rpn_boxes = F.slice_axis(result, axis=-1, begin=1, end=None) if autograd.is_training(): return rpn_scores, rpn_boxes, raw_rpn_scores, raw_rpn_boxes, anchors else: return rpn_scores, rpn_boxes
def net(X): X = X.reshape((-1, num_inputs)) H1 = (nd.dot(X, W1) + b1).relu() # Use dropout only when training the model if autograd.is_training(): # Add a dropout layer after the first fully connected layer H1 = dropout(H1, drop_prob1) H2 = (nd.dot(H1, W2) + b2).relu() if autograd.is_training(): # Add a dropout layer after the second fully connected layer H2 = dropout(H2, drop_prob2) return nd.dot(H2, W3) + b3
def net(self, X): X = X.reshape(-1, self.num_inputs) H1 = (nd.dot(X, self.W1) + self.b1).relu() if autograd.is_training(): H1 = dropout(H1, self.drop_prob1) H2 = (nd.dot(H1, self.W2) + self.b2).relu() if autograd.is_training(): H2 = dropout(H2, self.drop_prob2) return nd.dot(H2, self.W3) + self.b3
def test(self): """ Returns true if this set contains the specified element """ x = nd.arange(4).reshape((4, 1)) x.attach_grad() print(autograd.is_training()) with autograd.record(): y = 2 * nd.dot(x.T, x) print(autograd.is_training()) y.backward() assert (x.grad - 4 * x).norm().asscalar() == 0 print(x.grad)
def hybrid_forward(self, F, anchor, score, bbox_pred, img): """ Generate proposals. Limit to batch-size=1 in current implementation. """ if autograd.is_training(): pre_nms = self._train_pre_nms post_nms = self._train_post_nms else: pre_nms = self._test_pre_nms post_nms = self._test_post_nms with autograd.pause(): # restore bounding boxes roi = self._box_decoder(bbox_pred, self._box_to_center(anchor)) # clip rois to image's boundary # roi = F.Custom(roi, img, op_type='bbox_clip_to_image') roi = self._clipper(roi, img) # remove bounding boxes that don't meet the min_size constraint # by setting them to (-1, -1, -1, -1) # width = roi.slice_axis(axis=-1, begin=2, end=3) # height = roi.slice_axis(axis=-1, begin=3, end=None) xmin, ymin, xmax, ymax = roi.split(axis=-1, num_outputs=4) width = xmax - xmin height = ymax - ymin # TODO:(zhreshold), there's im_ratio to handle here, but it requires # add' info, and we don't expect big difference invalid = (width < self._min_size) + (height < self._min_size) # # remove out of bound anchors # axmin, aymin, axmax, aymax = F.split(anchor, axis=-1, num_outputs=4) # # it's a bit tricky to get right/bottom boundary in hybridblock # wrange = F.arange(0, 2560).reshape((1, 1, 1, 2560)).slice_like( # img, axes=(3)).max().reshape((1, 1, 1)) # hrange = F.arange(0, 2560).reshape((1, 1, 2560, 1)).slice_like( # img, axes=(2)).max().reshape((1, 1, 1)) # invalid = (axmin < 0) + (aymin < 0) + F.broadcast_greater(axmax, wrange) + \ # F.broadcast_greater(aymax, hrange) # avoid invalid anchors suppress anchors with 0 confidence score = F.where(invalid, F.ones_like(invalid) * -1, score) invalid = F.repeat(invalid, axis=-1, repeats=4) roi = F.where(invalid, F.ones_like(invalid) * -1, roi) # Non-maximum suppression pre = F.concat(score, roi, dim=-1) tmp = F.contrib.box_nms(pre, overlap_thresh=self._nms_thresh, topk=pre_nms, coord_start=1, score_index=0, id_index=-1, force_suppress=True) # slice post_nms number of boxes result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms) rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1) rpn_bbox = F.slice_axis(result, axis=-1, begin=1, end=None) return rpn_scores, rpn_bbox
def sample_z(self, F, mu, logvar): if autograd.is_training(): std = F.exp(0.5 * logvar) eps = F.normal_like(std) return (eps * std) + mu else: return mu
def hybrid_forward(self, F, x: Union[mx.nd.NDArray, mx.sym.Symbol], *args, **kwargs): x = self.backbone(x) x = self.neck(x) x = self.head(x) if autograd.is_training(): return x return self.generate_result(F, x[0])
def feature_detect(self, tag_inputs, word_inputs, bert): is_train = autograd.is_training() batch_size = word_inputs.shape[1] seq_len = word_inputs.shape[0] # unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK) if self.pret_word_embs is not None: word_embs = self.pret_word_embs(nd.array(word_inputs)) if bert is not None: word_embs = nd.concat(word_embs, nd.array(bert), dim=2) else: word_embs = nd.array(bert) tag_embs = self.tag_embs(nd.array(tag_inputs)) if self.tag_embs is not None else None # Dropout if is_train: wm, tm = self.generate_emb_mask(seq_len, batch_size) if self.tag_embs is not None: emb_inputs = nd.concat(nd.multiply(wm, word_embs), nd.multiply(tm, tag_embs), dim=2) else: emb_inputs = nd.multiply(wm, word_embs) else: if self.tag_embs is not None: emb_inputs = nd.concat(word_embs, tag_embs, dim=2) # seq_len x batch_size else: emb_inputs = word_embs top_recur = biLSTM(self.f_lstm, self.b_lstm, emb_inputs, batch_size, dropout_x=self.dropout_lstm_input if is_train else 0) return top_recur
def biaffine(self, dep_arc, head_arc, mask, arc_targets): is_train = autograd.is_training() batch_size = mask.shape[1] seq_len = mask.shape[0] W_arc = self.arc_W.data() arc_logits = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1, bias_x=True, bias_y=False) # type: nd.NDArray # #head x #dep x batch_size if not is_train: return arc_logits # (#head x #dep) x batch_size flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size)) # (#head ) x (#dep x batch_size) flat_arc_targets = reshape_fortran(arc_targets, (seq_len, seq_len * batch_size)) losses = self.binary_ce_loss(flat_arc_logits, nd.array(flat_arc_targets)) mask_1D_tensor = nd.array(flatten_numpy(mask)) arc_loss = nd.sum(losses * mask_1D_tensor) / mask_1D_tensor.sum() return arc_logits, arc_loss
def hybrid_forward(self, F, x): """Hybrid forward""" features = self.features(x) cls_preds = [F.flatten(F.transpose(cp(feat), (0, 2, 3, 1))) for feat, cp in zip(features, self.class_predictors)] box_preds = [F.flatten(F.transpose(bp(feat), (0, 2, 3, 1))) for feat, bp in zip(features, self.box_predictors)] anchors = [F.reshape(ag(feat), shape=(1, -1)) for feat, ag in zip(features, self.anchor_generators)] cls_preds = F.concat(*cls_preds, dim=1).reshape((0, -1, self.num_classes + 1)) box_preds = F.concat(*box_preds, dim=1).reshape((0, -1, 4)) anchors = F.concat(*anchors, dim=1).reshape((1, -1, 4)) if autograd.is_training(): return [cls_preds, box_preds, anchors] bboxes = self.bbox_decoder(box_preds, anchors) cls_ids, scores = self.cls_decoder(F.softmax(cls_preds, axis=-1)) results = [] for i in range(self.num_classes): cls_id = cls_ids.slice_axis(axis=-1, begin=i, end=i+1) score = scores.slice_axis(axis=-1, begin=i, end=i+1) # per class results per_result = F.concat(*[cls_id, score, bboxes], dim=-1) results.append(per_result) result = F.concat(*results, dim=1) if self.nms_thresh > 0 and self.nms_thresh < 1: result = F.contrib.box_nms( result, overlap_thresh=self.nms_thresh, topk=self.nms_topk, valid_thresh=0.01, id_index=0, score_index=1, coord_start=2, force_suppress=False) if self.post_nms > 0: result = result.slice_axis(axis=1, begin=0, end=self.post_nms) ids = F.slice_axis(result, axis=2, begin=0, end=1) scores = F.slice_axis(result, axis=2, begin=1, end=2) bboxes = F.slice_axis(result, axis=2, begin=2, end=6) return ids, scores, bboxes
def hybrid_forward(self, F, x, anchors, offsets): # x ==> (B, pred per pixel, height*width) pred = self.prediction(x).reshape((0, self._num_anchors*self._num_pred, -1)) pred = F.transpose(pred, (0, 2, 1)).reshape((0, -1, self._num_anchors, self._num_pred)) # components raw_box_centers = pred.slice_axis(axis=-1, begin=0, end=2) raw_box_scales = pred.slice_axis(axis=-1, begin=2, end=4) objness = pred.slice_axis(axis=-1, begin=4, end=5) class_pred = pred.slice_axis(axis=-1, begin=5, end=None) # get offsets # (1, 1, n, n, 2) ==> (1, 1, height, width, 2) offsets = F.slice_like(offsets, x*0, axes=(2, 3)) # (1, 1, height, width, 2) ==> (1, height*width, 1, 2) offsets = F.reshape(offsets, (1, -1, 1, 2)) box_centers = F.broadcast_add(F.sigmoid(raw_box_centers), offsets)*self._strides box_scales = F.broadcast_mul(F.exp(raw_box_scales), anchors) confidence = F.sigmoid(objness) class_score = F.broadcast_mul(confidence, F.sigmoid(class_pred)) wh = box_scales/2 bbox = F.concat(box_centers - wh, box_centers + wh, dim=-1) bbox = F.reshape(bbox, (0, -1, 4)) if autograd.is_training(): return bbox, raw_box_centers, raw_box_scales, objness, class_pred, anchors, offsets
def hybrid_forward(self, F, x, img): """Forward RPN. The behavior during traing and inference is different. Parameters ---------- x : mxnet.nd.NDArray or mxnet.symbol Feature tensor. img : mxnet.nd.NDArray or mxnet.symbol The original input image. Returns ------- (rpn_score, rpn_box) Returns predicted scores and regions which are candidates of objects. """ anchors = self.anchor_generator(x) x = self.conv1(x) raw_rpn_scores = self.score(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 1)) rpn_scores = F.sigmoid(F.stop_gradient(raw_rpn_scores)) rpn_box_pred = self.loc(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 4)) rpn_score, rpn_box = self.region_proposaler( anchors, rpn_scores, F.stop_gradient(rpn_box_pred), img) if autograd.is_training(): # return raw predictions as well in training for bp return rpn_score, rpn_box, raw_rpn_scores, rpn_box_pred, anchors return rpn_score, rpn_box
def hybrid_forward(self, F, X): mu, logvar = self.encode(X) if autograd.is_training(): std = F.exp(0.5 * logvar) eps = F.random.normal_like(std) mu = (eps * std) + mu return self.decode(mu), mu, logvar
def forward(self, input): hidden = self.dropout(self.encoder(input)) pred = self.decoder(hidden) if autograd.is_training(): return pred * np.sign(input) else: return pred
def forward(self, x): x = self.layer(x) # Note that the loss function has the sigmoid operation for better numerical stability. When # doing inference, we need to add the sigmoid function to the model. if not autograd.is_training(): x = nd.sigmoid(x) return x
def hybrid_forward(self, F, x): routes = [] for stage in self.stages: x = stage(x) routes.append(x) all_dets = [] all_box_centers = [] all_box_scales = [] all_objectness = [] all_class_pred = [] all_anchors = [] all_offsets = [] for i, block, output in zip(range(len(routes)), self.blocks, self.outputs): x, tip = block(x) print ('tip shape: {}'.format(tip.shape)) if autograd.is_training(): dets, box_centers, box_scales, objness, class_pred, anchors, offsets = output(tip) all_dets.append(dets) all_box_centers.append(box_centers) all_box_scales.append(box_scales) all_objectness.append(objness) all_class_pred.append(class_pred) all_anchors.append(anchors) all_offsets.append(offsets) if i >= len(routes) - 1: break x = self.transitions[i](x) upsample = _upsample(x, stride=2) route_now = routes[::-1][i + 1] x = F.concat(F.slice_like(upsample, route_now * 0, axes=(2, 3)), route_now, dim=1) return all_dets, all_box_centers, all_box_scales, all_objectness, all_class_pred, all_anchors, all_offsets
def batch_norm(X, gamma, bata, moving_mean, moving_var, eps, momentum): """ moving_mean 在训练阶段并不使用,而是作为推理阶段的均值和方差,进行BN。 在训练阶段,会对 X 通过求解具体的 mean 和 var 获得 BN。 在训练阶段,卷积会对每个通道独立求均值和方差,并且该均值和方差作为当前通道进行处理。 """ if not autograd.is_training(): # 训练时,使用移动均值和方差处理样本 X_hat = (X - moving_mean) / nd.sqrt(moving_var + eps) else: # 预测时,使用小批量样本的平均均值和方差处理样本 assert len(X.shape) in (2, 4) if len(X.shape) == 2: # 使用全连接层时,计算特征维上的均值和方差 mean = X.mean(axis=0) var = ((X - mean) ** 2).mean(axis=0) else: # 使用二维卷积层的情况, 计算通道上(axis=1)的均值和方差。这里我们需要保持 # X的形状以便后面做广播运算。 mean = X.mean(axis=(0, 2, 3), keepdims=True) var = ((X - mean) ** 2).mean(axis=(0, 2, 3), keepdims=True) # 训练模式下用当前的均值和方差做标准化 X_hat = (X - mean) / nd.sqrt(var + eps) moving_mean = momentum * moving_mean + (1.0 - momentum) * mean moving_var = momentum * moving_var + (1.0 - momentum) * mean Y = gamma * X_hat + bata # 拉伸和偏移 return Y, moving_mean, moving_var
def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var): """Hybrid forward""" if not autograd.is_training(): return F.BatchNorm(x, gamma, beta, running_mean, running_var, name='fwd', **self._kwargs) isum, isqu = F.SumSquare(x) #isum = x.sum(axis=1, exclude=True) #isqu = (x**2).sum(axis=1, exclude=True) N = self.ndevices * x.shape[0] * x.shape[2] * x.shape[3] allreduce = AllReduce(self._prefix) osum, osqu = allreduce(isum, isqu) # calc mean and std mean = osum / N sumvar = osqu - osum * osum / N bias_var = sumvar / N std = F.sqrt(F.maximum(bias_var, self.eps)) # update running mean and var with autograd.pause(): unbias_var = sumvar / (N - 1) self.updater(self.running_mean, self.running_var, mean, unbias_var, self.momentum, x.context) # update running mean and var output = F.DecoupleBatchNorm(x, gamma, beta, mean, std) return output
def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum): # 通过auto grad来判断当前模式为训练模式还是预测模式 if not autograd.is_training(): #在预测模式下,直接使用传入的移动平均所得的均值和方差 # 标准化 X_hat = (X - moving_mean) / nd.sqrt(moving_var + eps) else: assert len(X.shape) in (2, 4) if len(X.shape) == 2: # 若为2维,则上面连接的是全连接层 # 在使用全连接层的情况下,计算特征维上的均值和方差 mean = X.mean(axis=0) var = ((X - mean)**2).mean(axis=0) # axis = 0 就是指在竖直方向上 else: # 若不是二维,则上面连接的就是卷积层 # 在使用二维卷积层的情况下,计算通道维上(axis=1)的均值和方差。 # 这里我们需要保持X的形状以便后面可以做广播运算 # 卷积层中的输入由4个维度,从1到4分别是 样本序号,通道数,高,宽 # 这里对通道数这一维并不做均值计算 mean = X.mean(axis=(0, 2, 3), keepdims=True) var = ((X - mean)**2).mean(axis=(0, 2, 3), keepdims=True) # 训练模式下用当前的均值和方差做标准化 X_hat = (X - mean) / nd.sqrt(var + eps) # 更新移动平均的均值和方差 moving_mean = momentum * moving_mean + (1.0 - momentum) * mean moving_var = momentum * moving_var + (1.0 - momentum) * var Y = gamma * X_hat + beta return Y, moving_mean, moving_var
def hybrid_forward(self, F, x, img): """Forward RPN. The behavior during traing and inference is different. Parameters ---------- x : mxnet.nd.NDArray or mxnet.symbol Feature tensor. img : mxnet.nd.NDArray or mxnet.symbol The original input image. Returns ------- (rpn_score, rpn_box) Returns predicted scores and regions which are candidates of objects. """ anchors = self.anchor_generator(x) x = self.conv1(x) raw_rpn_scores = self.score(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 1)) rpn_scores = F.sigmoid(raw_rpn_scores) rpn_box_pred = self.loc(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 4)) rpn_score, rpn_box = self.region_proposaler( anchors, rpn_scores, rpn_box_pred, img) if autograd.is_training(): # return raw predictions as well in training for bp return rpn_score, rpn_box, raw_rpn_scores, rpn_box_pred, anchors return rpn_score, rpn_box
def hybrid_forward(self, F, x, **kwargs): features = self.features(x) cls_preds = [F.flatten(F.transpose(cp(feat), (0, 2, 3, 1))) for feat, cp in zip(features, self.class_predictors)] box_preds = [F.flatten(F.transpose(bp(feat), (0, 2, 3, 1))) for feat, bp in zip(features, self.box_predictors)] anchors = [F.reshape(ag(feat), shape=(1, -1)) for feat, ag in zip(features, self.anchor_generators)] cls_preds = F.concat(*cls_preds, dim=1).reshape((0, -1, self.num_classes + 1)) box_preds = F.concat(*box_preds, dim=1).reshape((0, -1, 4)) anchors = F.concat(*anchors, dim=1).reshape((1, -1, 4)) if autograd.is_training(): return [cls_preds, box_preds, anchors] bboxes = self.bbox_decoder(box_preds, anchors) cls_ids, scores = self.cls_decoder(F.softmax(cls_preds, axis=-1)) results = [] for i in range(self.num_classes): cls_id = cls_ids.slice_axis(axis=-1, begin=i, end=i+1) score = scores.slice_axis(axis=-1, begin=i, end=i+1) # per class results per_result = F.concat(*[cls_id, score, bboxes], dim=-1) results.append(per_result) result = F.concat(*results, dim=1) if self.nms_thresh > 0 and self.nms_thresh < 1: result = F.contrib.box_nms( result, overlap_thresh=self.nms_thresh, topk=self.nms_topk, valid_thresh=0.01, id_index=0, score_index=1, coord_start=2, force_suppress=False) if self.post_nms > 0: result = result.slice_axis(axis=1, begin=0, end=self.post_nms) ids = F.slice_axis(result, axis=2, begin=0, end=1) scores = F.slice_axis(result, axis=2, begin=1, end=2) bboxes = F.slice_axis(result, axis=2, begin=2, end=6) return ids, scores, bboxes
def hybrid_forward(self, F, x): print('F: ', F) print('x: ', x) x = self.features(x) x = self.deconv_layers(x) ret = [] # 2dpose task -> 0: hm, 1: wh, 2: hps, 3: reg, 4: hm_hp, 5:hp_offset for head in self.heads: ret.append(self.__getattribute__(head)(x)) if autograd.is_training(): # during training, just need to return the tensor for computing loss print("training mode") #return [ret] return ret else: # during inference, need to decode the output tensor into actual detections # detections is composed of several things: detections = nd.concat(bboxes, scores, kps, clses, dim=2) # detections = decode_centernet_pose(heat, wh, kps, reg, hm_hp, hp_offset, K=100) print("inference mode") #detections = symbolic_decode_centernet_pose(F, ret[0].sigmoid(), ret[1], ret[2], ret[3], ret[4].sigmoid(), ret[5], K=10) detections = symbolic_decode_centernet_pose(F, ret[0].sigmoid(), ret[1], ret[2], K=10) print("decode finished!") detections.save("symbol-detections.json") return detections
def hybrid_forward(self, F, x, anchors, offsets): """Hybrid Foward of YOLOV3Output layer. Parameters ---------- F : mxnet.nd or mxnet.sym `F` is mxnet.sym if hybridized or mxnet.nd if not. x : mxnet.nd.NDArray Input feature map. anchors : mxnet.nd.NDArray Anchors loaded from self, no need to supply. offsets : mxnet.nd.NDArray Offsets loaded from self, no need to supply. Returns ------- (tuple of) mxnet.nd.NDArray During training, return (bbox, raw_box_centers, raw_box_scales, objness, class_pred, anchors, offsets). During inference, return detections. """ # prediction flat to (batch, pred per pixel, height * width) pred = self.prediction(x).reshape((0, self._num_anchors * self._num_pred, -1)) # transpose to (batch, height * width, num_anchor, num_pred) pred = pred.transpose(axes=(0, 2, 1)).reshape((0, -1, self._num_anchors, self._num_pred)) # components raw_box_centers = pred.slice_axis(axis=-1, begin=0, end=2) raw_box_scales = pred.slice_axis(axis=-1, begin=2, end=4) objness = pred.slice_axis(axis=-1, begin=4, end=5) class_pred = pred.slice_axis(axis=-1, begin=5, end=None) # valid offsets, (1, 1, height, width, 2) offsets = F.slice_like(offsets, x * 0, axes=(2, 3)) # reshape to (1, height*width, 1, 2) offsets = offsets.reshape((1, -1, 1, 2)) box_centers = F.broadcast_add(F.sigmoid(raw_box_centers), offsets) * self._stride box_scales = F.broadcast_mul(F.exp(raw_box_scales), anchors) confidence = F.sigmoid(objness) class_score = F.broadcast_mul(F.sigmoid(class_pred), confidence) wh = box_scales / 2.0 bbox = F.concat(box_centers - wh, box_centers + wh, dim=-1) if autograd.is_training(): # during training, we don't need to convert whole bunch of info to detection results return (bbox.reshape((0, -1, 4)), raw_box_centers, raw_box_scales, objness, class_pred, anchors, offsets) # prediction per class bboxes = F.tile(bbox, reps=(self._classes, 1, 1, 1, 1)) scores = F.transpose(class_score, axes=(3, 0, 1, 2)).expand_dims(axis=-1) ids = F.broadcast_add(scores * 0, F.arange(0, self._classes).reshape((0, 1, 1, 1, 1))) detections = F.concat(ids, scores, bboxes, dim=-1) # reshape to (B, xx, 6) detections = F.reshape(detections.transpose(axes=(1, 0, 2, 3, 4)), (0, -1, 6)) return detections
def hybrid_forward(self, F, x, gt_box=None): """Forward Mask RCNN network. The behavior during training and inference is different. Parameters ---------- x : mxnet.nd.NDArray or mxnet.symbol The network input tensor. gt_box : type, only required during training The ground-truth bbox tensor with shape (1, N, 4). Returns ------- (ids, scores, bboxes, masks) During inference, returns final class id, confidence scores, bounding boxes, segmentation masks. """ if autograd.is_training(): cls_pred, box_pred, rpn_box, samples, matches, \ raw_rpn_score, raw_rpn_box, anchors, top_feat = \ super(MaskRCNN, self).hybrid_forward(F, x, gt_box) mask_pred = self.mask(top_feat) return cls_pred, box_pred, mask_pred, rpn_box, samples, matches, \ raw_rpn_score, raw_rpn_box, anchors else: ids, scores, boxes, feat = \ super(MaskRCNN, self).hybrid_forward(F, x) # (B, N * (C - 1), 1) -> (B, N * (C - 1)) -> (B, topk) num_rois = self._rcnn_max_dets order = F.argsort(scores.squeeze(axis=-1), axis=1, is_ascend=False) topk = F.slice_axis(order, axis=1, begin=0, end=num_rois) # pick from (B, N * (C - 1), X) to (B * topk, X) -> (B, topk, X) # roi_batch_id = F.arange(0, self._max_batch, repeat=num_rois) roi_batch_id = F.arange(0, self._max_batch) roi_batch_id = F.repeat(roi_batch_id, num_rois) indices = F.stack(roi_batch_id, topk.reshape((-1,)), axis=0) ids = F.gather_nd(ids, indices).reshape((-4, self._max_batch, num_rois, 1)) scores = F.gather_nd(scores, indices).reshape((-4, self._max_batch, num_rois, 1)) boxes = F.gather_nd(boxes, indices).reshape((-4, self._max_batch, num_rois, 4)) # create batch id and reshape for roi pooling padded_rois = F.concat(roi_batch_id.reshape((-1, 1)), boxes.reshape((-3, 0)), dim=-1) padded_rois = F.stop_gradient(padded_rois) # pool to roi features if self.num_stages > 1: # using FPN pooled_feat = self._pyramid_roi_feats(F, feat, padded_rois, self._roi_size, self._strides, roi_mode=self._roi_mode) else: if self._roi_mode == 'pool': pooled_feat = F.ROIPooling( feat[0], padded_rois, self._roi_size, 1. / self._strides) elif self._roi_mode == 'align': pooled_feat = F.contrib.ROIAlign( feat[0], padded_rois, self._roi_size, 1. / self._strides, sample_ratio=2) else: raise ValueError("Invalid roi mode: {}".format(self._roi_mode)) # run top_features again if self.top_features is not None: top_feat = self.top_features(pooled_feat) else: top_feat = pooled_feat # (B, N, C, pooled_size * 2, pooled_size * 2) rcnn_mask = self.mask(top_feat) # index the B dimension (B * N,) # batch_ids = F.arange(0, self._max_batch, repeat=num_rois) batch_ids = F.arange(0, self._max_batch) batch_ids = F.repeat(batch_ids, num_rois) # index the N dimension (B * N,) roi_ids = F.tile(F.arange(0, num_rois), reps=self._max_batch) # index the C dimension (B * N,) class_ids = ids.reshape((-1,)) # clip to 0 to max class class_ids = F.clip(class_ids, 0, self.num_class) # pick from (B, N, C, PS*2, PS*2) -> (B * N, PS*2, PS*2) indices = F.stack(batch_ids, roi_ids, class_ids, axis=0) masks = F.gather_nd(rcnn_mask, indices) # (B * N, PS*2, PS*2) -> (B, N, PS*2, PS*2) masks = masks.reshape((-4, self._max_batch, num_rois, 0, 0)) # output prob masks = F.sigmoid(masks) # ids (B, N, 1), scores (B, N, 1), boxes (B, N, 4), masks (B, N, PS*2, PS*2) return ids, scores, boxes, masks
def hybrid_forward(self, F, x, gt_box=None): """Forward Faster-RCNN network. The behavior during traing and inference is different. Parameters ---------- x : mxnet.nd.NDArray or mxnet.symbol The network input tensor. gt_box : type, only required during training The ground-truth bbox tensor with shape (1, N, 4). Returns ------- (ids, scores, bboxes) During inference, returns final class id, confidence scores, bounding boxes. """ def _split(x, axis, num_outputs, squeeze_axis): x = F.split(x, axis=axis, num_outputs=num_outputs, squeeze_axis=squeeze_axis) if isinstance(x, list): return x else: return [x] feat = self.features(x) # RPN proposals if autograd.is_training(): rpn_score, rpn_box, raw_rpn_score, raw_rpn_box, anchors = \ self.rpn(feat, F.zeros_like(x)) rpn_box, samples, matches = self.sampler(rpn_box, rpn_score, gt_box) else: _, rpn_box = self.rpn(feat, F.zeros_like(x)) # create batchid for roi num_roi = self._num_sample if autograd.is_training() else self._rpn_test_post_nms with autograd.pause(): roi_batchid = F.arange(0, self._max_batch, repeat=num_roi) # remove batch dim because ROIPooling require 2d input rpn_roi = F.concat(*[roi_batchid.reshape((-1, 1)), rpn_box.reshape((-1, 4))], dim=-1) rpn_roi = F.stop_gradient(rpn_roi) # ROI features if self._roi_mode == 'pool': pooled_feat = F.ROIPooling(feat, rpn_roi, self._roi_size, 1. / self._stride) elif self._roi_mode == 'align': pooled_feat = F.contrib.ROIAlign(feat, rpn_roi, self._roi_size, 1. / self._stride, sample_ratio=2) else: raise ValueError("Invalid roi mode: {}".format(self._roi_mode)) # RCNN prediction top_feat = self.top_features(pooled_feat) avg_feat = self.global_avg_pool(top_feat) cls_pred = self.class_predictor(avg_feat) box_pred = self.box_predictor(avg_feat) # cls_pred (B * N, C) -> (B, N, C) cls_pred = cls_pred.reshape((self._max_batch, num_roi, self.num_class + 1)) # box_pred (B * N, C * 4) -> (B, N, C, 4) box_pred = box_pred.reshape((self._max_batch, num_roi, self.num_class, 4)) # no need to convert bounding boxes in training, just return if autograd.is_training(): if self._additional_output: return (cls_pred, box_pred, rpn_box, samples, matches, raw_rpn_score, raw_rpn_box, anchors, top_feat) return (cls_pred, box_pred, rpn_box, samples, matches, raw_rpn_score, raw_rpn_box, anchors) # cls_ids (B, N, C), scores (B, N, C) cls_ids, scores = self.cls_decoder(F.softmax(cls_pred, axis=-1)) # cls_ids, scores (B, N, C) -> (B, C, N) -> (B, C, N, 1) cls_ids = cls_ids.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) scores = scores.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) # box_pred (B, N, C, 4) -> (B, C, N, 4) box_pred = box_pred.transpose((0, 2, 1, 3)) # rpn_boxes (B, N, 4) -> B * (1, N, 4) rpn_boxes = _split(rpn_box, axis=0, num_outputs=self._max_batch, squeeze_axis=False) # cls_ids, scores (B, C, N, 1) -> B * (C, N, 1) cls_ids = _split(cls_ids, axis=0, num_outputs=self._max_batch, squeeze_axis=True) scores = _split(scores, axis=0, num_outputs=self._max_batch, squeeze_axis=True) # box_preds (B, C, N, 4) -> B * (C, N, 4) box_preds = _split(box_pred, axis=0, num_outputs=self._max_batch, squeeze_axis=True) # per batch predict, nms, each class has topk outputs results = [] for rpn_box, cls_id, score, box_pred in zip(rpn_boxes, cls_ids, scores, box_preds): # box_pred (C, N, 4) rpn_box (1, N, 4) -> bbox (C, N, 4) bbox = self.box_decoder(box_pred, self.box_to_center(rpn_box)) # res (C, N, 6) res = F.concat(*[cls_id, score, bbox], dim=-1) # res (C, self.nms_topk, 6) res = F.contrib.box_nms( res, overlap_thresh=self.nms_thresh, topk=self.nms_topk, valid_thresh=0.0001, id_index=0, score_index=1, coord_start=2, force_suppress=True) # res (C * self.nms_topk, 6) res = res.reshape((-3, 0)) results.append(res) # result B * (C * topk, 6) -> (B, C * topk, 6) result = F.stack(*results, axis=0) ids = F.slice_axis(result, axis=-1, begin=0, end=1) scores = F.slice_axis(result, axis=-1, begin=1, end=2) bboxes = F.slice_axis(result, axis=-1, begin=2, end=6) if self._additional_output: return ids, scores, bboxes, feat return ids, scores, bboxes
def hybrid_forward(self, F, img, *x): """Forward RPN. The behavior during training and inference is different. Parameters ---------- img : mxnet.nd.NDArray or mxnet.symbol The original input image. x : mxnet.nd.NDArray or mxnet.symbol(s) Feature tensor(s). Returns ------- (rpn_score, rpn_box) Returns predicted scores and regions which are candidates of objects. """ if autograd.is_training(): pre_nms = self._train_pre_nms post_nms = self._train_post_nms else: pre_nms = self._test_pre_nms post_nms = self._test_post_nms anchors = [] rpn_pre_nms_proposals = [] raw_rpn_scores = [] raw_rpn_boxes = [] if self._multi_level: # Generate anchors in [P2, P3, P4, P5, P6] order for i, feat in enumerate(x): ag = self.anchor_generator[i] anchor = ag(feat) rpn_score, rpn_box, raw_rpn_score, raw_rpn_box = \ self.rpn_head(feat) rpn_pre = self.region_proposer(anchor, rpn_score, rpn_box, img) anchors.append(anchor) rpn_pre_nms_proposals.append(rpn_pre) raw_rpn_scores.append(raw_rpn_score) raw_rpn_boxes.append(raw_rpn_box) rpn_pre_nms_proposals = F.concat(*rpn_pre_nms_proposals, dim=1) raw_rpn_scores = F.concat(*raw_rpn_scores, dim=1) raw_rpn_boxes = F.concat(*raw_rpn_boxes, dim=1) else: x = x[0] anchors = self.anchor_generator(x) x = self.conv1(x) raw_rpn_scores = self.score(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 1)) rpn_scores = F.sigmoid(F.stop_gradient(raw_rpn_scores)) raw_rpn_boxes = self.loc(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 4)) rpn_boxes = F.stop_gradient(raw_rpn_boxes) rpn_pre_nms_proposals = self.region_proposer( anchors, rpn_scores, rpn_boxes, img) # Non-maximum suppression with autograd.pause(): tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=self._nms_thresh, topk=pre_nms, coord_start=1, score_index=0, id_index=-1, force_suppress=True) # slice post_nms number of boxes result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms) rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1) rpn_boxes = F.slice_axis(result, axis=-1, begin=1, end=None) if autograd.is_training(): # return raw predictions as well in training for bp return rpn_scores, rpn_boxes, raw_rpn_scores, raw_rpn_boxes, anchors return rpn_scores, rpn_boxes
def forward(self, word_inputs, tag_inputs, arc_targets=None, rel_targets=None): """Run decoding Parameters ---------- word_inputs : mxnet.ndarray.NDArray word indices of seq_len x batch_size tag_inputs : mxnet.ndarray.NDArray tag indices of seq_len x batch_size arc_targets : mxnet.ndarray.NDArray gold arc indices of seq_len x batch_size rel_targets : mxnet.ndarray.NDArray gold rel indices of seq_len x batch_size Returns ------- tuple (arc_accuracy, rel_accuracy, overall_accuracy, loss) when training, else if given gold target then return arc_accuracy, rel_accuracy, overall_accuracy, outputs, otherwise return outputs, where outputs is a list of (arcs, rels). """ is_train = autograd.is_training() def flatten_numpy(ndarray): """Flatten nd-array to 1-d column vector Parameters ---------- ndarray : numpy.ndarray input tensor Returns ------- numpy.ndarray A column vector """ return np.reshape(ndarray, (-1,), 'F') batch_size = word_inputs.shape[1] seq_len = word_inputs.shape[0] mask = np.greater(word_inputs, self._vocab.ROOT).astype(np.float32) num_tokens = int(np.sum(mask)) # non padding, non root token number if is_train or arc_targets is not None: mask_1D = flatten_numpy(mask) mask_1D_tensor = nd.array(mask_1D) unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK) word_embs = self.word_embs(nd.array(unked_words, dtype='int')) if self.pret_word_embs: word_embs = word_embs + self.pret_word_embs(nd.array(word_inputs)) tag_embs = self.tag_embs(nd.array(tag_inputs)) # Dropout emb_inputs = nd.concat(word_embs, tag_embs, dim=2) # seq_len x batch_size top_recur = biLSTM(self.f_lstm, self.b_lstm, emb_inputs, batch_size, dropout_x=self.dropout_lstm_input if is_train else 0) top_recur = nd.Dropout(data=top_recur, axes=[0], p=self.dropout_mlp) W_dep, b_dep = self.mlp_dep_W.data(), self.mlp_dep_b.data() W_head, b_head = self.mlp_head_W.data(), self.mlp_head_b.data() dep, head = leaky_relu(nd.dot(top_recur, W_dep.T) + b_dep), leaky_relu(nd.dot(top_recur, W_head.T) + b_head) dep, head = nd.Dropout(data=dep, axes=[0], p=self.dropout_mlp), nd.Dropout(data=head, axes=[0], p=self.dropout_mlp) dep, head = nd.transpose(dep, axes=[2, 0, 1]), nd.transpose(head, axes=[2, 0, 1]) dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:] head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:] W_arc = self.arc_W.data() arc_logits = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1, bias_x=True, bias_y=False) # (#head x #dep) x batch_size flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size)) # (#head ) x (#dep x batch_size) arc_preds = arc_logits.argmax(0) # seq_len x batch_size if is_train or arc_targets is not None: correct = np.equal(arc_preds.asnumpy(), arc_targets) arc_correct = correct.astype(np.float32) * mask arc_accuracy = np.sum(arc_correct) / num_tokens targets_1D = flatten_numpy(arc_targets) losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D)) arc_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: arc_probs = np.transpose( np.reshape(nd.softmax(flat_arc_logits, axis=0).asnumpy(), (seq_len, seq_len, batch_size), 'F')) # #batch_size x #dep x #head W_rel = self.rel_W.data() rel_logits = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size, num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True) # (#head x rel_size x #dep) x batch_size flat_rel_logits = reshape_fortran(rel_logits, (seq_len, self._vocab.rel_size, seq_len * batch_size)) # (#head x rel_size) x (#dep x batch_size) _target_vec = nd.array(targets_1D if is_train else flatten_numpy(arc_preds.asnumpy())).reshape( seq_len * batch_size, 1) _target_mat = _target_vec * nd.ones((1, self._vocab.rel_size)) partial_rel_logits = nd.pick(flat_rel_logits, _target_mat.T, axis=0) # (rel_size) x (#dep x batch_size) if is_train or arc_targets is not None: rel_preds = partial_rel_logits.argmax(0) targets_1D = flatten_numpy(rel_targets) rel_correct = np.equal(rel_preds.asnumpy(), targets_1D).astype(np.float32) * mask_1D rel_accuracy = np.sum(rel_correct) / num_tokens losses = self.softmax_loss(partial_rel_logits, nd.array(targets_1D)) rel_loss = nd.sum(losses * mask_1D_tensor) / num_tokens if not is_train: rel_probs = np.transpose(np.reshape(nd.softmax(flat_rel_logits.transpose([1, 0, 2]), axis=0).asnumpy(), (self._vocab.rel_size, seq_len, seq_len, batch_size), 'F')) # batch_size x #dep x #head x #nclasses if is_train or arc_targets is not None: loss = arc_loss + rel_loss correct = rel_correct * flatten_numpy(arc_correct) overall_accuracy = np.sum(correct) / num_tokens if is_train: return arc_accuracy, rel_accuracy, overall_accuracy, loss outputs = [] for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs, rel_probs): # parse sentences one by one msk[0] = 1. sent_len = int(np.sum(msk)) arc_pred = arc_argmax(arc_prob, sent_len, msk) rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred] rel_pred = rel_argmax(rel_prob, sent_len) outputs.append((arc_pred[1:sent_len], rel_pred[1:sent_len])) if arc_targets is not None: return arc_accuracy, rel_accuracy, overall_accuracy, outputs return outputs
def hybrid_forward(self, F, x, *args): """YOLOV3 network hybrid forward. Parameters ---------- F : mxnet.nd or mxnet.sym `F` is mxnet.sym if hybridized or mxnet.nd if not. x : mxnet.nd.NDArray Input data. *args : optional, mxnet.nd.NDArray During training, extra inputs are required: (gt_boxes, obj_t, centers_t, scales_t, weights_t, clas_t) These are generated by YOLOV3PrefetchTargetGenerator in dataloader transform function. Returns ------- (tuple of) mxnet.nd.NDArray During inference, return detections in shape (B, N, 6) with format (cid, score, xmin, ymin, xmax, ymax) During training, return losses only: (obj_loss, center_loss, scale_loss, cls_loss). """ all_box_centers = [] all_box_scales = [] all_objectness = [] all_class_pred = [] all_anchors = [] all_offsets = [] all_feat_maps = [] all_detections = [] routes = [] for stage, block, output in zip(self.stages, self.yolo_blocks, self.yolo_outputs): x = stage(x) routes.append(x) # the YOLO output layers are used in reverse order, i.e., from very deep layers to shallow for i, block, output in zip(range(len(routes)), self.yolo_blocks, self.yolo_outputs): x, tip = block(x) if autograd.is_training(): dets, box_centers, box_scales, objness, class_pred, anchors, offsets = output(tip) all_box_centers.append(box_centers.reshape((0, -3, -1))) all_box_scales.append(box_scales.reshape((0, -3, -1))) all_objectness.append(objness.reshape((0, -3, -1))) all_class_pred.append(class_pred.reshape((0, -3, -1))) all_anchors.append(anchors) all_offsets.append(offsets) # here we use fake featmap to reduce memory consuption, only shape[2, 3] is used fake_featmap = F.zeros_like(tip.slice_axis( axis=0, begin=0, end=1).slice_axis(axis=1, begin=0, end=1)) all_feat_maps.append(fake_featmap) else: dets = output(tip) all_detections.append(dets) if i >= len(routes) - 1: break # add transition layers x = self.transitions[i](x) # upsample feature map reverse to shallow layers upsample = _upsample(x, stride=2) route_now = routes[::-1][i + 1] x = F.concat(F.slice_like(upsample, route_now * 0, axes=(2, 3)), route_now, dim=1) if autograd.is_training(): # during training, the network behaves differently since we don't need detection results if autograd.is_recording(): # generate losses and return them directly box_preds = F.concat(*all_detections, dim=1) all_preds = [F.concat(*p, dim=1) for p in [ all_objectness, all_box_centers, all_box_scales, all_class_pred]] all_targets = self._target_generator(box_preds, *args) return self._loss(*(all_preds + all_targets)) # return raw predictions, this is only used in DataLoader transform function. return (F.concat(*all_detections, dim=1), all_anchors, all_offsets, all_feat_maps, F.concat(*all_box_centers, dim=1), F.concat(*all_box_scales, dim=1), F.concat(*all_objectness, dim=1), F.concat(*all_class_pred, dim=1)) # concat all detection results from different stages result = F.concat(*all_detections, dim=1) # apply nms per class if self.nms_thresh > 0 and self.nms_thresh < 1: result = F.contrib.box_nms( result, overlap_thresh=self.nms_thresh, valid_thresh=0.01, topk=self.nms_topk, id_index=0, score_index=1, coord_start=2, force_suppress=False) if self.post_nms > 0: result = result.slice_axis(axis=1, begin=0, end=self.post_nms) ids = result.slice_axis(axis=-1, begin=0, end=1) scores = result.slice_axis(axis=-1, begin=1, end=2) bboxes = result.slice_axis(axis=-1, begin=2, end=None) return ids, scores, bboxes