def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(N, OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = tensor(inpv, dtype=inp_dtype) w_int8 = Parameter(wv, dtype=w_dtype) b_int32 = Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") def run_batch_conv_bias(inp, w, b): b = b if has_bias else Parameter(np.zeros_like(b.numpy())) result = F.quantized.batch_conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, ) return result.astype("float32") expected = F.conv2d(inp_fp32, w_fp32[0], b_fp32 if has_bias else None)[0] expected = expected.astype(out_dtype).astype("float32") expected = F.flatten(expected) result = run_batch_conv_bias(inp_int8, w_int8, b_int32) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
def forward(self, xin, labels=None, imgs=None): outputs = [] assert not self.training for k, (cls_conv, reg_conv, stride_this_level, x) in enumerate( zip(self.cls_convs, self.reg_convs, self.strides, xin)): x = self.stems[k](x) cls_x = x reg_x = x cls_feat = cls_conv(cls_x) cls_output = self.cls_preds[k](cls_feat) reg_feat = reg_conv(reg_x) reg_output = self.reg_preds[k](reg_feat) obj_output = self.obj_preds[k](reg_feat) output = F.concat( [reg_output, F.sigmoid(obj_output), F.sigmoid(cls_output)], 1) outputs.append(output) self.hw = [x.shape[-2:] for x in outputs] # [batch, n_anchors_all, 85] outputs = F.concat([F.flatten(x, start_axis=2) for x in outputs], axis=2) outputs = F.transpose(outputs, (0, 2, 1)) if self.decode_in_inference: return self.decode_outputs(outputs) else: return outputs
def _get_mat3x3(self, image): """get perspective matrix used in the transformation note: there are only 8 degrees of freedom in a perspective matrix, while the output matrix has 9 variables. Args: image (Tensor): input images (shape: n * 3 * 112 * 112) Returns: mat3x3 (Tensor): perspective matrix (shape: n * 3 * 3) """ x = self.stem(image) x = F.avg_pool2d(x, 7) x = F.flatten(x, 1) x = self.fc(x) s = self.input_size # 0.01 here is a magic number. it aims to maintain identity transform at early stage of training residual = x.reshape(-1, 3, 3) * 0.01 base = mge.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]).astype("float32") base = F.broadcast_to(base, residual.shape) left_scale = mge.tensor([[s, 0, 0], [0, s, 0], [0, 0, 1]]).astype("float32") left_scale = F.broadcast_to(left_scale, residual.shape) right_scale = mge.tensor([[1 / s, 0, 0], [0, 1 / s, 0], [0, 0, 1]]).astype("float32") right_scale = F.broadcast_to(right_scale, residual.shape) mat3x3 = F.matmul(left_scale, F.matmul(base + residual, right_scale)) return mat3x3
def forward(self, x): x = self.stem(x) x = self.features(x) x = F.avg_pool2d(x, 7) x = F.flatten(x, 1) x = self.classifier(x) return x
def forward(self, x): x = self.bn1(x) x = self.dropout(x) x = F.flatten(x, 1) x = self.fc(x) x = self.bn2(x) return x
def forward(self, inputs): out_l1 = self.level1(inputs) if not self.reinf: del inputs inputs = None out_l2 = self.level2(out_l1, inputs) out_l3_0 = self.level3_0(out_l2, inputs) for i, layer in enumerate(self.level3): if i == 0: out_l3 = layer(out_l3_0) else: out_l3 = layer(out_l3) outl4_0 = self.level4_0(out_l3, inputs) for i, layer in enumerate(self.level4): if i == 0: out_l4 = layer(outl4_0) else: out_l4 = layer(out_l4) outl5_0 = self.level5_0(out_l4, inputs) for i, layer in enumerate(self.level5): if i == 0: out_l5 = layer(outl5_0) else: out_l5 = layer(out_l5) net = F.adaptive_avg_pool2d(out_l5, 1) net = F.flatten(net, 1) net = self.classifier(net) return net
def forward(self, x): if self.mode == "flatten": x = F.flatten(x, 1) x = self.linear(x) x = self.linear_bias(x) return x
def forward(self, x): net = self.features(x) if self.features_only: return net net = self.global_avg(net) net = F.flatten(net, 1) net = self.classifier(net) return net
def forward(self, x): x = self.bn1(x) x = self.dropout(x) x = F.avg_pool2d(x, self.size) x = F.flatten(x, 1) x = self.fc(x) x = self.bn2(x) return x
def forward(self, x): x = self.extract_features(x)["res3"] x = F.avg_pool2d(x, 7) x = F.flatten(x, 1) x = self.fc(x) return x
def forward(self, x): x = self._transform_input(x) # N x 3 x 299 x 299 x = self.Conv2d_1a_3x3(x) # N x 32 x 149 x 149 x = self.Conv2d_2a_3x3(x) # N x 32 x 147 x 147 x = self.Conv2d_2b_3x3(x) # N x 64 x 147 x 147 x = self.maxpool1(x) # N x 64 x 73 x 73 x = self.Conv2d_3b_1x1(x) # N x 80 x 73 x 73 x = self.Conv2d_4a_3x3(x) # N x 192 x 71 x 71 x = self.maxpool2(x) # N x 192 x 35 x 35 x = self.Mixed_5b(x) # N x 256 x 35 x 35 x = self.Mixed_5c(x) # N x 288 x 35 x 35 x = self.Mixed_5d(x) # N x 288 x 35 x 35 x = self.Mixed_6a(x) # N x 768 x 17 x 17 x = self.Mixed_6b(x) # N x 768 x 17 x 17 x = self.Mixed_6c(x) # N x 768 x 17 x 17 x = self.Mixed_6d(x) # N x 768 x 17 x 17 x = self.Mixed_6e(x) # N x 768 x 17 x 17 aux_defined = self.training and self.aux_logits if aux_defined: aux = self.AuxLogits(x) else: aux = None # N x 768 x 17 x 17 x = self.Mixed_7a(x) # N x 1280 x 8 x 8 x = self.Mixed_7b(x) # N x 2048 x 8 x 8 x = self.Mixed_7c(x) # N x 2048 x 8 x 8 # Adaptive average pooling x = self.avgpool(x) # N x 2048 x 1 x 1 x = self.dropout(x) # N x 2048 x 1 x 1 x = F.flatten(x, 1) # N x 2048 x = self.fc(x) # N x 1000 (num_classes) if self.training and self.aux_logits: return x, aux return x
def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = F.relu(x) x = self.avgpool(x) x = F.avg_pool2d(x, 22) x = F.flatten(x, 1) x = self.fc(x) return x
def forward(self, x): x = F.reshape(x, (1, 3, 224, 224)) x = self.extract_features(x)["res5"] x = F.avg_pool2d(x, 7) x = F.flatten(x, 1) x = self.fc(x) return x
def forward(self, x): x = self.extract_features(x)["res5"] x = F.avg_pool2d(x, 7) print(x.shape) x = F.flatten(x, 1) x = self.fc(x) return x
def forward(self, x): x = self.pool1(self.relu1(self.conv1(x))) x = self.pool2(self.relu2(self.conv2(x))) # F.flatten 将原本形状为 (N, C, H, W) 的张量 x 从第一个维度(即 C)开始拉平成一个维度, # 得到的新张量形状为 (N, C*H*W) 。 等价于 reshape 操作: x = x.reshape(x.shape[0], -1) x = F.flatten(x, 1) x = self.relu3(self.fc1(x)) x = self.relu4(self.fc2(x)) x = self.classifier(x) return x
def forward(self, x): # FIXME whenever finding elegant solution x = self.quant(x) x = self.extract_features(x)["res5"] x = F.avg_pool2d(x, 7) x = F.flatten(x, 1) x = self.dequant(x) x = self.fc(x) return x
def forward(self, x): if dist.get_rank() > 0: x = recv_fr_prev_gpu() x = self.features(x) if dist.get_rank() != 3: _ = send_to_next_gpu(x) else: x = F.avg_pool2d(x, 7) x = F.flatten(x, 1) x = self.classifier(x) return x
def forward(self, x): x = self.quant(x) x = self.first_conv(x) x = self.maxpool(x) x = self.features(x) x = F.avg_pool2d(x, 7) x = F.flatten(x, 1) x = self.dequant(x) x = self.classifier(x) return x
def forward(self, fpn_fms, rois, gtboxes=None, im_info = None): rpn_fms = fpn_fms[1:] rpn_fms.reverse() rcnn_rois = rois stride = [4, 8, 16, 32] if self.training: rcnn_rois, labels, bbox_targets = fpn_roi_target(rois, im_info, gtboxes, self.iou_thresh, top_k=self.nheads) pool5, rcnn_rois, labels, bbox_targets = roi_pooler( rpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', \ labels, bbox_targets) else: pool5, rcnn_rois, _, _ = roi_pooler(rpn_fms, rcnn_rois, stride, (7, 7), 'roi_align') pool5 = F.flatten(pool5, start_axis=1) fc1 = self.relu(self.fc1(pool5)) fc2 = self.relu(self.fc2(fc1)) prob = self.p(fc2) if self.refinement: final_pred = self.refinement_module(prob, fc2) loss = {} if self.training: # compute the loss function and then return bbox_targets = bbox_targets.reshape(-1, 4) if self.nheads > 1 else bbox_targets labels = labels.reshape(-1) loss = self.compute_regular_loss(prob, bbox_targets, labels) if self.nheads < 2 else \ self.compute_gemini_loss_opr(prob, bbox_targets, labels) pred_bboxes = self.recover_pred_boxes(rcnn_rois, prob, self.nheads) if self.refinement: auxi_loss = self.compute_gemini_loss_opr(final_pred, bbox_targets, labels) pred_boxes = self.recover_pred_boxes(rcnn_rois, final_pred, self.nheads) loss.update(auxi_loss) return loss, pred_bboxes else: # return the detection boxes and their scores pred_boxes = self.recover_pred_boxes(rcnn_rois, prob, self.nheads) if self.refinement: pred_boxes = self.recover_pred_boxes(rcnn_rois, final_pred, self.nheads) return pred_boxes
def forward(self, x): x = self.conv0(x) x = self.bn0(x) x = self.relu0(x) x = self.pool0(x) x = self.conv1(x) x = self.bn1(x) x = self.relu1(x) x = self.pool1(x) x = F.flatten(x, 1) x = self.fc0(x) x = self.relu2(x) x = self.fc1(x) return x
def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None): rcnn_rois, labels, bbox_targets = self.get_ground_truth( rcnn_rois, im_info, gt_boxes) fpn_fms = [fpn_fms[x] for x in self.in_features] pool_features = layers.roi_pool( fpn_fms, rcnn_rois, self.stride, self.pooling_size, self.pooling_method, ) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_cls = self.pred_cls(roi_feature) pred_delta = self.pred_delta(roi_feature) if self.training: # loss for classification loss_rcnn_cls = layers.softmax_loss(pred_cls, labels) # loss for regression pred_delta = pred_delta.reshape(-1, self.cfg.num_classes + 1, 4) vlabels = labels.reshape(-1, 1).broadcast((labels.shapeof(0), 4)) pred_delta = F.indexing_one_hot(pred_delta, vlabels, axis=1) loss_rcnn_loc = layers.get_smooth_l1_loss( pred_delta, bbox_targets, labels, self.cfg.rcnn_smooth_l1_beta, norm_type="all", ) loss_dict = { 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_loc': loss_rcnn_loc } return loss_dict else: # slice 1 for removing background pred_scores = F.softmax(pred_cls, axis=1)[:, 1:] pred_delta = pred_delta[:, 4:].reshape(-1, 4) target_shape = (rcnn_rois.shapeof(0), self.cfg.num_classes, 4) # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4) base_rois = F.add_axis(rcnn_rois[:, 1:5], 1).broadcast(target_shape).reshape(-1, 4) pred_bbox = self.box_coder.decode(base_rois, pred_delta) return pred_bbox, pred_scores
def run_conv_bias(inp, w, b, format="NCHW"): b = b if has_bias else np.zeros_like(b) if format == "NCHW4": inp = convert_to_nchw4(inp) w = convert_to_nchw4(w) b = F.flatten(b) return F.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, )
def forward(self, x): # N x 768 x 17 x 17 x = self.avgpool1(x) # N x 768 x 5 x 5 x = self.conv0(x) # N x 128 x 5 x 5 x = self.conv1(x) # N x 768 x 1 x 1 x = self.avgpool(x) # N x 768 x 1 x 1 x = F.flatten(x, 1) # N x 768 x = self.fc(x) # N x 1000 return x
def _forward_impl(self, x): net = self.conv1(x) net = self.bn1(net) net = self.relu(net) net = self.maxpool(net) net = self.layer1(net) net = self.layer2(net) net = self.layer3(net) net = self.layer4(net) net = self.avgpool(net) net = F.flatten(net, 1) net = self.fc(net) return net
def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None): rcnn_rois, labels, bbox_targets = self.get_ground_truth( rcnn_rois, im_info, gt_boxes) fpn_fms = [fpn_fms[x] for x in self.in_features] pool_features = layers.roi_pool( fpn_fms, rcnn_rois, self.stride, self.pooling_size, self.pooling_method, ) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_logits = self.pred_cls(roi_feature) pred_offsets = self.pred_delta(roi_feature) if self.training: # loss for rcnn classification loss_rcnn_cls = F.loss.cross_entropy(pred_logits, labels, axis=1) # loss for rcnn regression pred_offsets = pred_offsets.reshape(-1, self.cfg.num_classes, 4) num_samples = labels.shape[0] fg_mask = labels > 0 loss_rcnn_bbox = layers.smooth_l1_loss( pred_offsets[fg_mask, labels[fg_mask] - 1], bbox_targets[fg_mask], self.cfg.rcnn_smooth_l1_beta, ).sum() / F.maximum(num_samples, 1) loss_dict = { "loss_rcnn_cls": loss_rcnn_cls, "loss_rcnn_bbox": loss_rcnn_bbox, } return loss_dict else: # slice 1 for removing background pred_scores = F.softmax(pred_logits, axis=1)[:, 1:] pred_offsets = pred_offsets.reshape(-1, 4) target_shape = (rcnn_rois.shape[0], self.cfg.num_classes, 4) # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4) base_rois = F.broadcast_to( F.expand_dims(rcnn_rois[:, 1:5], axis=1), target_shape).reshape(-1, 4) pred_bbox = self.box_coder.decode(base_rois, pred_offsets) return pred_bbox, pred_scores
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:][::-1] stride = [4, 8, 16, 32] pool_features, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_cls = self.pred_cls(roi_feature) pred_delta = self.pred_delta(roi_feature) if self.training: # loss for regression labels = labels.astype(np.int32).reshape(-1) # mulitple class to one pos_masks = labels > 0 pred_delta = pred_delta.reshape(-1, config.num_classes, 4) indexing_label = (labels * pos_masks).reshape(-1, 1) indexing_label = indexing_label.broadcast((labels.shapeof()[0], 4)) pred_delta = F.indexing_one_hot(pred_delta, indexing_label, 1) localization_loss = smooth_l1_loss(pred_delta, bbox_targets, config.rcnn_smooth_l1_beta) localization_loss = localization_loss * pos_masks # loss for classification valid_masks = labels >= 0 objectness_loss = softmax_loss(pred_cls, labels) objectness_loss = objectness_loss * valid_masks normalizer = 1.0 / (valid_masks.sum()) loss_rcnn_cls = objectness_loss.sum() * normalizer loss_rcnn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict['loss_rcnn_cls'] = loss_rcnn_cls loss_dict['loss_rcnn_loc'] = loss_rcnn_loc return loss_dict else: pred_scores = F.softmax(pred_cls)[:, 1:].reshape(-1, 1) pred_delta = pred_delta[:, 4:].reshape(-1, 4) target_shape = (rcnn_rois.shapeof()[0], config.num_classes - 1, 4) base_rois = F.add_axis(rcnn_rois[:, 1:5], 1).broadcast(target_shape).reshape(-1, 4) pred_bbox = restore_bbox(base_rois, pred_delta, True) pred_bbox = F.concat([pred_bbox, pred_scores], axis=1) return pred_bbox
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:][::-1] stride = [4, 8, 16, 32] pool_features, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_emd_pred_cls_0 = self.emd_pred_cls_0(roi_feature) pred_emd_pred_delta_0 = self.emd_pred_delta_0(roi_feature) pred_emd_pred_cls_1 = self.emd_pred_cls_1(roi_feature) pred_emd_pred_delta_1 = self.emd_pred_delta_1(roi_feature) if self.training: loss0 = emd_loss( pred_emd_pred_delta_0, pred_emd_pred_cls_0, pred_emd_pred_delta_1, pred_emd_pred_cls_1, bbox_targets, labels) loss1 = emd_loss( pred_emd_pred_delta_1, pred_emd_pred_cls_1, pred_emd_pred_delta_0, pred_emd_pred_cls_0, bbox_targets, labels) loss = F.concat([loss0, loss1], axis=1) indices = F.argmin(loss, axis=1) loss_emd = F.indexing_one_hot(loss, indices, 1) loss_emd = loss_emd.sum()/loss_emd.shapeof()[0] loss_dict = {} loss_dict['loss_rcnn_emd'] = loss_emd return loss_dict else: pred_scores_0 = F.softmax(pred_emd_pred_cls_0)[:, 1:].reshape(-1, 1) pred_scores_1 = F.softmax(pred_emd_pred_cls_1)[:, 1:].reshape(-1, 1) pred_delta_0 = pred_emd_pred_delta_0[:, 4:].reshape(-1, 4) pred_delta_1 = pred_emd_pred_delta_1[:, 4:].reshape(-1, 4) target_shape = (rcnn_rois.shapeof()[0], config.num_classes - 1, 4) base_rois = F.add_axis(rcnn_rois[:, 1:5], 1).broadcast(target_shape).reshape(-1, 4) pred_bbox_0 = restore_bbox(base_rois, pred_delta_0, True) pred_bbox_1 = restore_bbox(base_rois, pred_delta_1, True) pred_bbox_0 = F.concat([pred_bbox_0, pred_scores_0], axis=1) pred_bbox_1 = F.concat([pred_bbox_1, pred_scores_1], axis=1) #[{head0, pre1, tag1}, {head1, pre1, tag1}, {head0, pre1, tag2}, ...] pred_bbox = F.concat((pred_bbox_0, pred_bbox_1), axis=1).reshape(-1,5) return pred_bbox
def forward(self, fpn_fms, proposals, labels=None, bbox_targets=None): # input p2-p5 fpn_fms = fpn_fms[1:][::-1] stride = [4, 8, 16, 32] #pool_features = roi_pooler(fpn_fms, proposals, stride, (7, 7), "ROIAlignV2") pool_features, proposals, labels, bbox_targets = roi_pool( fpn_fms, proposals, stride, (7, 7), 'roi_align', labels, bbox_targets) flatten_feature = F.flatten(pool_features, start_axis=1) roi_feature = F.relu(self.fc1(flatten_feature)) roi_feature = F.relu(self.fc2(roi_feature)) pred_cls = self.pred_cls(roi_feature) pred_delta = self.pred_delta(roi_feature) if self.training: # loss for regression labels = labels.astype(np.int32).reshape(-1) # mulitple class to one pos_masks = labels > 0 localization_loss = smooth_l1_loss( pred_delta, bbox_targets, config.rcnn_smooth_l1_beta) localization_loss = localization_loss * pos_masks # loss for classification valid_masks = labels >= 0 objectness_loss = softmax_loss( pred_cls, labels) objectness_loss = objectness_loss * valid_masks normalizer = 1.0 / (valid_masks.sum()) loss_rcnn_cls = objectness_loss.sum() * normalizer loss_rcnn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict[self.stage_name + '_cls'] = loss_rcnn_cls loss_dict[self.stage_name + '_loc'] = loss_rcnn_loc pred_bbox = restore_bbox(proposals[:, 1:5], pred_delta, True) pred_proposals = F.zero_grad(F.concat([proposals[:, 0].reshape(-1, 1), pred_bbox], axis=1)) return pred_proposals, loss_dict else: pred_scores = F.softmax(pred_cls)[:, 1].reshape(-1, 1) pred_bbox = restore_bbox(proposals[:, 1:5], pred_delta, True) pred_proposals = F.concat([proposals[:, 0].reshape(-1, 1), pred_bbox], axis=1) return pred_proposals, pred_scores
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:] fpn_fms.reverse() stride = [4, 8, 16, 32] poo5, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) poo5 = F.flatten(poo5, start_axis=1) fc1 = F.relu(self.fc1(poo5)) fc2 = F.relu(self.fc2(fc1)) cls_scores = self.cls(fc2) pred_boxes = self.bbox(fc2) # a = self.a(fc2) # b = self.b(fc2) # prob = F.stack([a, b], axis=1).reshape(-1, a.shape[1]) prob = F.concat([pred_boxes, cls_scores], axis=1) if self.training: # emd_loss = self.compute_gemini_loss(prob, bbox_targets, labels) bbox_targets, labels = bbox_targets.reshape(-1, 4), labels.flatten() cls_loss = softmax_loss(cls_scores, labels) pred_boxes = pred_boxes.reshape(-1, self.n, 4) bbox_loss = smooth_l1_loss_rcnn(pred_boxes, bbox_targets, labels, \ config.rcnn_smooth_l1_beta) loss_dict = {} loss_dict['cls_loss'] = cls_loss loss_dict['bbox_loss'] = bbox_loss return loss_dict else: offsets, cls_scores = prob[:, :-self.n], prob[:, -self.n:] pred_bbox = offsets.reshape(-1, self.n, 4) cls_prob = F.softmax(cls_scores, axis=1) n = rcnn_rois.shape[0] rois = F.broadcast_to(F.expand_dims(rcnn_rois[:, 1:5], axis=1), (n, 1, 4)).reshape(-1, 4) normalized = config.rcnn_bbox_normalize_targets pred_boxes = restore_bbox(rois, pred_bbox, normalized, config) pred_bbox = F.concat([pred_boxes, F.expand_dims(cls_prob, axis=2)], axis=2) return pred_bbox
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # stride: 64,32,16,8,4 -> 4, 8, 16, 32 fpn_fms = fpn_fms[1:] fpn_fms.reverse() stride = [4, 8, 16, 32] poo5, rcnn_rois, labels, bbox_targets = roi_pool( fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels, bbox_targets) poo5 = F.flatten(poo5, start_axis=1) fc1 = F.relu(self.fc1(poo5)) fc2 = F.relu(self.fc2(fc1)) a = self.a(fc2) b = self.b(fc2) prob = F.stack([a, b], axis=1).reshape(-1, a.shape[1]) if self.refinement: final_prob = self.refinement_module(prob, fc2) if self.training: emd_loss = self.compute_gemini_loss(prob, bbox_targets, labels) loss_dict = {} loss_dict['loss_rcnn_emd'] = emd_loss if self.refinement_module: final_emd_loss = self.compute_gemini_loss( final_prob, bbox_targets, labels) loss_dict['final_rcnn_emd'] = final_emd_loss return loss_dict else: offsets, cls_scores = prob[:, :-self.n], prob[:, -self.n:] pred_bbox = offsets.reshape(-1, self.n, 4) cls_prob = F.softmax(cls_scores, axis=1) n = rcnn_rois.shape[0] rois = F.broadcast_to(F.expand_dims(rcnn_rois[:, 1:5], axis=1), (n, 2, 4)).reshape(-1, 4) normalized = config.rcnn_bbox_normalize_targets pred_boxes = restore_bbox(rois, pred_bbox, normalized, config) pred_bbox = F.concat( [pred_boxes, F.expand_dims(cls_prob, axis=2)], axis=2) return pred_bbox