def test_bbox_coders(): coder_config = {'type': constants.KEY_ORIENTS} bbox_coder = bbox_coders.build(coder_config) dataset = build_dataset() sample = dataset[0] label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D]) p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2]) proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES]) label_boxes_3d = torch.stack(3 * [label_boxes_3d[:num_instances]], dim=0) proposals = torch.stack(3 * [proposals[:num_instances]], dim=0) p2 = torch.stack(3 * [p2], dim=0) orients = bbox_coder.encode_batch(label_boxes_3d, proposals, p2) # import ipdb # ipdb.set_trace() print(orients.shape) encoded_cls_orients = torch.zeros_like(orients[:, :, :2]) cls_orients = orients[:, :, :1].long() row = torch.arange(0, cls_orients.numel()).type_as(cls_orients) encoded_cls_orients.view(-1, 2)[row, cls_orients.view(-1)] = 1 encoded_orients = torch.cat([encoded_cls_orients, orients[:, :, 1:]], dim=-1) ry = bbox_coder.decode_batch(encoded_orients, proposals, proposals, p2) # import ipdb # ipdb.set_trace() print(ry) print(label_boxes_3d[:, :, -1]) print(sample[constants.KEY_IMAGE_PATH])
def __init__(self, assigner_config): # some compositions self.similarity_calc = similarity_calcs.build( assigner_config['similarity_calc_config']) self.bbox_coder = bbox_coders.build(assigner_config['coder_config']) self.matcher = matchers.build(assigner_config['matcher_config']) self.fg_thresh = assigner_config['fg_thresh'] self.bg_thresh = assigner_config['bg_thresh']
def test_corners_3d_coder(): # import ipdb # ipdb.set_trace() coder_config = {'type': constants.KEY_CORNERS_3D} bbox_coder = bbox_coders.build(coder_config) dataset = build_dataset() sample = dataset[0] label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D]) label_boxes_2d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2]) proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES]) # ry = compute_ray_angle(label_boxes_3d[:, :3]) # label_boxes_3d[:, -1] += ry label_boxes_3d = torch.stack(1 * [label_boxes_3d[:num_instances]], dim=0) label_boxes_2d = torch.stack(1 * [label_boxes_2d[:num_instances]], dim=0) proposals = torch.stack(1 * [proposals[:num_instances]], dim=0) p2 = torch.stack(1 * [p2], dim=0) # import ipdb # ipdb.set_trace() # label_boxes_3d[:, :, -1] = 0 encoded_corners_3d = bbox_coder.encode_batch(label_boxes_3d, label_boxes_2d, p2) # torch.cat([encoded_corners_2d, ]) num_boxes = encoded_corners_3d.shape[1] batch_size = encoded_corners_3d.shape[0] decoded_corners_3d = bbox_coder.decode_batch( encoded_corners_3d.view(batch_size, num_boxes, -1), proposals, p2) decoded_corners_2d = geometry_utils.torch_points_3d_to_points_2d( decoded_corners_3d[0].view(-1, 3), p2[0]).view(-1, 8, 2) decoded_corners_2d = decoded_corners_2d.cpu().detach().numpy() image_path = sample[constants.KEY_IMAGE_PATH] image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer(image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir) visualizer.render_image_corners_2d(image_path, decoded_corners_2d)
def test_orient_coder(): coder_config = {'type': constants.KEY_ORIENTS} bbox_coder = bbox_coders.build(coder_config) dataset = build_dataset() sample = dataset[0] label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D]) p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2]) proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) orients = bbox_coder.encode_batch(label_boxes_3d, proposals, p2) print(orients.shape)
def test_orientv3_coder(): coder_config = {'type': constants.KEY_ORIENTS_V3} orient_coder = bbox_coders.build(coder_config) dataset = build_dataset() sample = dataset[0] label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D]) num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES]) label_boxes_3d = torch.stack(3 * [label_boxes_3d[:num_instances]], dim=0) orients = orient_coder.encode_batch(label_boxes_3d) print(orients)
def assign_target(cls, **kwargs): match = kwargs[constants.KEY_MATCH] gt = kwargs[constants.KEY_BOXES_3D] assigned_gt = cls.generate_assigned_label(cls, kwargs[constants.KEY_MATCH], gt) coder = bbox_coders.build({'type': constants.KEY_ORIENTS_V3}) reg_targets_batch = coder.encode_batch(assigned_gt) reg_targets_batch[match == -1] = 0 # no need grad_fn return reg_targets_batch
def assign_target(cls, **kwargs): match = kwargs[constants.KEY_MATCH] gt = kwargs[constants.KEY_BOXES_3D] assigned_gt = cls.generate_assigned_label(cls, kwargs[constants.KEY_MATCH], gt) proposals = kwargs[constants.KEY_PROPOSALS] p2 = kwargs[constants.KEY_STEREO_CALIB_P2] coder = bbox_coders.build({'type': constants.KEY_REAR_SIDE}) reg_targets_batch = coder.encode_batch(assigned_gt, proposals, p2) reg_targets_batch[match == -1] = 0 # no need grad_fn return reg_targets_batch
def assign_target(cls, **kwargs): # match = kwargs[constants.KEY_MATCH] label_boxes_3d = kwargs[constants.KEY_BOXES_3D] p2 = kwargs[constants.KEY_STEREO_CALIB_P2] # prepare coder # 2d coder config coder = bbox_coders.build({'type': constants.KEY_CORNERS_3D_GRNET}) reg_targets_batch = coder.encode_batch(label_boxes_3d, p2) reg_targets_batch = cls.generate_assigned_label( cls, kwargs[constants.KEY_MATCH], reg_targets_batch) # reg_targets_batch[match == -1] = 0 # no need grad_fn return reg_targets_batch
def assign_target(cls, **kwargs): match = kwargs[constants.KEY_MATCH] gt = kwargs[constants.KEY_BOXES_2D] proposals = kwargs[constants.KEY_PROPOSALS] assigned_gt = cls.generate_assigned_label(cls, kwargs[constants.KEY_MATCH], gt) # prepare coder # 2d coder config bbox_coder_config = kwargs[ constants.KEY_TARGET_GENERATOR_CONFIG]['coder_config'] coder = bbox_coders.build(bbox_coder_config) reg_targets_batch = coder.encode_batch(proposals, assigned_gt) reg_targets_batch[match == -1] = 0 # no need grad_fn return reg_targets_batch
def assign_target(cls, **kwargs): match = kwargs[constants.KEY_MATCH] # label_boxes_2d = kwargs[constants.KEY_BOXES_2D] proposals = kwargs[constants.KEY_PROPOSALS] label_boxes_3d = kwargs[constants.KEY_BOXES_3D] p2 = kwargs[constants.KEY_STEREO_CALIB_P2] # prepare coder # 2d coder config coder = bbox_coders.build({'type': constants.KEY_CORNERS_3D}) label_boxes_3d = cls.generate_assigned_label( cls, kwargs[constants.KEY_MATCH], label_boxes_3d) reg_targets_batch = coder.encode_batch(label_boxes_3d, proposals, p2) reg_targets_batch[match == -1] = 0 # no need grad_fn return reg_targets_batch
def assign_target(cls, **kwargs): match = kwargs[constants.KEY_MATCH] keypoints = kwargs[constants.KEY_KEYPOINTS] # prepare coder # 2d coder config coder = bbox_coders.build({'type': constants.KEY_KEYPOINTS_HEATMAP}) proposals = kwargs[constants.KEY_PROPOSALS] # assign label keypoints first assigned_keypoints = cls.generate_assigned_label( cls, kwargs[constants.KEY_MATCH], keypoints) reg_targets_batch = coder.encode_batch(proposals, assigned_keypoints) # reg_targets_batch = cls.generate_assigned_label( # cls, kwargs[constants.KEY_MATCH], reg_targets_batch) reg_targets_batch[match == -1] = 0 # no need grad_fn return reg_targets_batch
def assign_target(cls, **kwargs): match = kwargs[constants.KEY_MATCH] gt = kwargs[constants.KEY_BOXES_3D][:, :, 3:6] label_classes = kwargs[constants.KEY_CLASSES] mean_dims = kwargs[constants.KEY_MEAN_DIMS] bg_dim = torch.zeros_like(mean_dims[:, -1:, :]) mean_dims = torch.cat([bg_dim, mean_dims], dim=1) mean_dims = cls.generate_assigned_label(cls, label_classes.long(), mean_dims) # prepare coder coder = bbox_coders.build({'type': constants.KEY_DIMS}) reg_targets_batch = coder.encode_batch(gt, mean_dims) reg_targets_batch = cls.generate_assigned_label( cls, kwargs[constants.KEY_MATCH], reg_targets_batch) reg_targets_batch[match == -1] = 0 # no need grad_fn return reg_targets_batch
def encode_orig(self, boxes, classes, threshold=0.5): default_boxes = self.default_boxes # wh = default_boxes[:, 2:] default_boxes = torch.cat([ default_boxes[:, :2] - default_boxes[:, 2:] / 2, default_boxes[:, :2] + default_boxes[:, 2:] / 2 ], 1) # xmin, ymin, xmax, ymax # iou = self.iou(boxes, default_boxes) # [#obj,8732] similarity_calc = similarity_calcs.build({'type': 'center'}) iou = similarity_calc.compare_batch( boxes.unsqueeze(0), default_boxes.unsqueeze(0)).squeeze(0) max_iou, max_anchor = iou.max(1) iou, max_idx = iou.max(0) # [1,8732] max_idx.squeeze_(0) # [8732,] iou.squeeze_(0) # [8732,] boxes = boxes[max_idx] # [8732,4] # variances = [0.1, 0.2] # xymin = (boxes[:, :2] - default_boxes[:, :2]) / (variances[0] * wh) # xymax = (boxes[:, 2:] - default_boxes[:, 2:]) / (variances[0] * wh) # loc = torch.cat([xymin, xymax], 1) # [8732,4] coder = bbox_coders.build({'type': constants.KEY_BOXES_2D}) loc = coder.encode_batch(default_boxes.unsqueeze(0), boxes.unsqueeze(0)).squeeze(0) neg = (iou < 0.4) ignore = (iou < threshold) os = torch.ones(iou.size()).long() os[ignore] = -1 os[neg] = 0 neg = (iou < 0.3) neg[max_anchor] = 0 ignore[max_anchor] = 0 conf = classes[max_idx] # [8732,], background class = 0 conf[ignore] = -1 # ignore[0.4, 0.5] conf[neg] = 0 # background return loc, conf, os, max_idx
def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds, im_info): # TODO create a new Function """ Args: rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W) rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W) anchors: FloatTensor,shape(N,4,H,W) Returns: proposals_batch: FloatTensor, shape(N,post_nms_topN,4) fg_probs_batch: FloatTensor, shape(N,post_nms_topN) """ # do not backward rpn_cls_probs = rpn_cls_probs.detach() rpn_bbox_preds = rpn_bbox_preds.detach() batch_size = rpn_bbox_preds.shape[0] coders = bbox_coders.build( self.target_generators.target_generator_config['coder_config']) proposals = coders.decode_batch(rpn_bbox_preds, anchors) # filer and clip proposals = box_ops.clip_boxes(proposals, im_info) # fg prob fg_probs = rpn_cls_probs[:, :, 1] # sort fg _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True) proposals_order = torch.zeros( batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order) return proposals, proposals_order
def test_mobileye_coder(): coder_config = {'type': constants.KEY_MOBILEYE} bbox_coder = bbox_coders.build(coder_config) dataset = build_dataset('kitti') image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer(image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir) for sample in dataset: label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D]) label_boxes_2d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2]) proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES]) image_info = torch.from_numpy(sample[constants.KEY_IMAGE_INFO]) label_boxes_3d = torch.stack(1 * [label_boxes_3d[:num_instances]], dim=0) label_boxes_2d = torch.stack(1 * [label_boxes_2d[:num_instances]], dim=0) proposals = torch.stack(1 * [proposals[:num_instances]], dim=0) image_info = torch.stack(1 * [image_info], dim=0) p2 = torch.stack(1 * [p2], dim=0) encoded_corners_2d = bbox_coder.encode_batch(label_boxes_3d, label_boxes_2d, p2, image_info, label_boxes_2d) # torch.cat([encoded_corners_2d, ]) # num_boxes = encoded_corners_2d.shape[1] # batch_size = encoded_corners_2d.shape[0] # center_depth = encoded_corners_2d[:, :, -1:] # encoded_corners_2d = encoded_corners_2d[:, :, :-1].view( # batch_size, num_boxes, 8, 4) # encoded_visibility = torch.zeros_like(encoded_corners_2d[:, :, :, :2]) # visibility = encoded_corners_2d[:, :, :, -1:].long() # row = torch.arange(0, visibility.numel()).type_as(visibility) # encoded_visibility.view(-1, 2)[row, visibility.view(-1)] = 1 # encoded_corners_2d = torch.cat( # [encoded_corners_2d[:, :, :, :3], encoded_visibility], dim=-1) # encoded_corners_2d = torch.cat( # [encoded_corners_2d.view(batch_size, num_boxes, -1), center_depth], # dim=-1) decoded_corners_2d = bbox_coder.decode_batch(encoded_corners_2d, proposals) decoded_corners_2d = decoded_corners_2d.cpu().detach().numpy() # import ipdb # ipdb.set_trace() image_path = sample[constants.KEY_IMAGE_PATH] visualizer.render_image_corners_2d(image_path, corners_2d=decoded_corners_2d[0], p2=p2[0])
def forward(self, feed_dict): # import ipdb # ipdb.set_trace() prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'base_feat': base_feat}) self.add_feat('base_feat', base_feat) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.rcnn_pooling(base_feat, rois.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat = pooled_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) if not self.class_agnostic: # import ipdb # ipdb.set_trace() if self.training: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1)) else: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, rcnn_cls_probs.argmax(dim=-1).view(-1)) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds # import ipdb # ipdb.set_trace() multi_stage_loss_units.extend([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D] ]) multi_stage_stats.append(stats) # decode for next stage coder = bbox_coders.build(self.target_generators[i]. target_generator_config['coder_config']) proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach() if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats else: prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs image_info = feed_dict[constants.KEY_IMAGE_INFO] proposals[:, :, :: 2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze( -1).unsqueeze(-1) proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[:, 2].unsqueeze( -1).unsqueeze(-1) prediction_dict[constants.KEY_BOXES_2D] = proposals if self.training: loss_dict = self.loss(prediction_dict, feed_dict) return prediction_dict, loss_dict else: return prediction_dict
def test_keypoint_hm_coder(): coder_config = {'type': constants.KEY_KEYPOINTS_HEATMAP} bbox_coder = bbox_coders.build(coder_config) dataset = build_dataset(dataset_type='keypoint_kitti') sample = dataset[0] label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D]) label_boxes_2d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2]) proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D]) num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES]) keypoints = sample[constants.KEY_KEYPOINTS] # ry = compute_ray_angle(label_boxes_3d[:, :3]) # label_boxes_3d[:, -1] += ry label_boxes_3d = torch.stack(1 * [label_boxes_3d[:num_instances]], dim=0) label_boxes_2d = torch.stack(1 * [label_boxes_2d[:num_instances]], dim=0) proposals = torch.stack(1 * [proposals[:num_instances]], dim=0) keypoints = torch.stack(1 * [keypoints[:num_instances]], dim=0) p2 = torch.stack(1 * [p2], dim=0) # label_boxes_3d[:, :, -1] = 0 # import ipdb # ipdb.set_trace() encoded_corners_3d = bbox_coder.encode_batch(proposals, keypoints) # torch.cat([encoded_corners_2d, ]) num_boxes = encoded_corners_3d.shape[1] batch_size = encoded_corners_3d.shape[0] keypoint_heatmap = encoded_corners_3d.view(batch_size, num_boxes, 8, -1)[..., :-1] # resolution = bbox_coder.resolution # keypoint_heatmap = torch.zeros((batch_size * num_boxes * 8, resolution * resolution)) # row = torch.arange(keypoint.numel()).type_as(keypoint) # keypoint_heatmap[row, keypoint.view(-1)] = 1 # keypoint_heatmap = torch.stack([keypoint_heatmap] * 3, dim=1) # reshape before decode keypoint_heatmap = keypoint_heatmap.contiguous().view( batch_size, num_boxes, -1) decoded_corners_2d = bbox_coder.decode_batch(proposals, keypoint_heatmap) decoded_corners_2d = decoded_corners_2d.cpu().detach().numpy() image_path = sample[constants.KEY_IMAGE_PATH] image_dir = '/data/object/training/image_2' result_dir = './results/data' save_dir = 'results/images' calib_dir = '/data/object/training/calib' label_dir = None calib_file = None visualizer = ImageVisualizer(image_dir, result_dir, label_dir=label_dir, calib_dir=calib_dir, calib_file=calib_file, online=False, save_dir=save_dir) # import ipdb # ipdb.set_trace() visualizer.render_image_corners_2d(image_path, corners_2d=decoded_corners_2d[0])
def __init__(self, config): self.coder = bbox_coders.build(config['coder_config'])
def forward(self, feed_dict): features = self.feature_extractor(feed_dict[constants.KEY_IMAGE]) y_locs1 = [] y_locs2 = [] y_os = [] y_cls = [] for i, x in enumerate(features): # location out loc_feature = self.loc_feature1(x) loc1 = self.box_out1(loc_feature) N = loc1.size(0) loc1 = loc1.permute(0, 2, 3, 1).contiguous() loc1 = loc1.view(N, -1, self.num_regress) y_locs1.append(loc1) loc_feature = torch.cat([x, loc_feature], dim=1) loc_feature = self.loc_feature2(loc_feature) loc2 = self.box_out2(loc_feature) N = loc2.size(0) loc2 = loc2.permute(0, 2, 3, 1).contiguous() loc2 = loc2.view(N, -1, self.num_regress) loc2 += loc1 y_locs2.append(loc2) # os out cls_feature = self.cls_feature1(x) os_out = self.os_out(cls_feature) os_out = os_out.permute(0, 2, 3, 1).contiguous() # _size = os_out.size(1) os_out = os_out.view(N, -1, 2) y_os.append(os_out) cls_feature = torch.cat([x, cls_feature], dim=1) cls_feature = self.cls_feature2(cls_feature) cls_out = self.cls_out(cls_feature) cls_out = cls_out.permute(0, 2, 3, 1).contiguous() cls_out = cls_out.view(N, -1, self.num_classes) y_cls.append(cls_out) loc1_preds = torch.cat(y_locs1, dim=1) loc2_preds = torch.cat(y_locs2, dim=1) os_preds = torch.cat(y_os, dim=1) cls_preds = torch.cat(y_cls, dim=1) image_info = feed_dict[constants.KEY_IMAGE_INFO] batch_size = loc1_preds.shape[0] anchors = self.anchors.cuda() anchors = anchors.repeat(batch_size, 1, 1) coder = bbox_coders.build( self.target_generators.target_generator_config['coder_config']) proposals = coder.decode_batch(loc2_preds, anchors).detach() cls_probs = F.softmax(cls_preds.detach(), dim=-1) os_probs = F.softmax(os_preds.detach(), dim=-1)[:, :, 1:] os_probs[os_probs <= 0.4] = 0 # final_probs = cls_probs * os_probs # import ipdb # ipdb.set_trace() final_probs = cls_probs * os_probs image_info = feed_dict[constants.KEY_IMAGE_INFO].unsqueeze( -1).unsqueeze(-1) prediction_dict = {} if self.training: # anchors = prediction_dict['anchors'] anchors_dict = {} anchors_dict[constants.KEY_PRIMARY] = anchors anchors_dict[constants.KEY_BOXES_2D] = loc1_preds anchors_dict[constants.KEY_BOXES_2D_REFINE] = loc2_preds anchors_dict[constants.KEY_CLASSES] = cls_preds anchors_dict[constants.KEY_OBJECTNESS] = os_preds # anchors_dict[constants.KEY_FINAL_PROBS] = final_probs gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[constants. KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_OBJECTNESS] = None gt_dict[constants.KEY_BOXES_2D_REFINE] = None auxiliary_dict = {} label_boxes_2d = feed_dict[constants.KEY_LABEL_BOXES_2D] if self.normlize_anchor: label_boxes_2d[:, :, ::2] = label_boxes_2d[:, :, :: 2] / image_info[:, 1] label_boxes_2d[:, :, 1::2] = label_boxes_2d[:, :, 1:: 2] / image_info[:, 0] auxiliary_dict[constants.KEY_BOXES_2D] = label_boxes_2d gt_labels = feed_dict[constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_CLASSES] = gt_labels auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = anchors proposals_dict, targets, stats = self.target_generators.generate_targets( anchors_dict, gt_dict, auxiliary_dict, subsample=False) # recall anchors_dict[constants.KEY_PRIMARY] = proposals _, _, second_stage_stats = self.target_generators.generate_targets( anchors_dict, gt_dict, auxiliary_dict, subsample=False) # precision fg_probs, _ = final_probs[:, :, 1:].max(dim=-1) fake_match = auxiliary_dict[constants.KEY_FAKE_MATCH] second_stage_stats.update( Analyzer.analyze_precision( fake_match, fg_probs, feed_dict[constants.KEY_NUM_INSTANCES], thresh=0.3)) prediction_dict[constants.KEY_STATS] = [stats, second_stage_stats] prediction_dict[constants.KEY_TARGETS] = targets else: prediction_dict[constants.KEY_CLASSES] = final_probs # prediction_dict[constants.KEY_OBJECTNESS] = os_preds proposals[:, :, ::2] = proposals[:, :, ::2] / image_info[:, 3] proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[:, 2] prediction_dict[constants.KEY_BOXES_2D] = proposals # return prediction_dict if self.training: loss_dict = self.loss(prediction_dict, feed_dict) return prediction_dict, loss_dict else: return prediction_dict
def forward(self, feed_dict): self.target_assigner.bbox_coder_3d.mean_dims = feed_dict[ constants.KEY_MEAN_DIMS] prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'base_feat': base_feat}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) second_pooled_feat = self.feature_extractor.second_stage_feature( pooled_feat) second_pooled_feat = second_pooled_feat.mean(3).mean(2) rcnn_cls_scores = self.rcnn_cls_preds(second_pooled_feat) rcnn_bbox_preds = self.rcnn_bbox_preds(second_pooled_feat) rcnn_3d = self.rcnn_3d_pred(second_pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] ################################### # 3d training ################################### prediction_dict['rcnn_3d'] = rcnn_3d # if not self.training: # _, pred_labels = rcnn_cls_probs.max(dim=-1) # rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_bbox( # rcnn_3d, rois_batch[0, :, 1:]) # prediction_dict['rcnn_3d'] = rcnn_3d # prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs batch_size = feed_dict[constants.KEY_IMAGE].shape[0] # coder = bbox_coders.build({'type': constants.KEY_BOXES_2D}) coder = self.target_assigner.bbox_coder proposals = coder.decode_batch(rcnn_bbox_preds.view(batch_size, -1, 4), rois_batch[0, :, 1:]).detach() # used for track # proposals_order = prediction_dict['proposals_order'] # prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ # proposals_order] ################################### # 3d training ################################### # prediction_dict['rcnn_3d'] = rcnn_3d if not self.training: new_rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_bbox( rcnn_3d, rois_batch[0]) # prediction_dict['rcnn_3d'] = rcnn_3d prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs.view( batch_size, -1, self.n_classes) image_info = feed_dict[constants.KEY_IMAGE_INFO] proposals[:, :, :: 2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze( -1).unsqueeze(-1) proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[:, 2].unsqueeze( -1).unsqueeze(-1) prediction_dict[constants.KEY_BOXES_2D] = proposals # import ipdb # ipdb.set_trace() dims = self.squeeze_bbox_preds( new_rcnn_3d[:, :-3].contiguous(), rcnn_cls_probs.argmax(dim=-1).view(-1), out_c=3).view(batch_size, -1, 3) # import ipdb # ipdb.set_trace() # import ipdb # ipdb.set_trace() # coder = bbox_coders.build({'type': constants.KEY_DIMS}) # dims = coder.decode_batch(rcnn_3d[:, :3].view(batch_size, -1, 3), # feed_dict[constants.KEY_MEAN_DIMS], # rcnn_cls_probs).detach() rcnn_orient_preds = rcnn_3d[:, 3:].view(batch_size, -1, 5) coder = bbox_coders.build({'type': constants.KEY_ORIENTS_V2}) orients = coder.decode_batch( rcnn_orient_preds, proposals, feed_dict[constants.KEY_STEREO_CALIB_P2_ORIG]).detach() prediction_dict[constants.KEY_DIMS] = dims prediction_dict[constants.KEY_ORIENTS_V2] = orients prediction_dict['rcnn_3d'] = torch.cat( [dims, new_rcnn_3d[:, -3:].view(batch_size, -1, 3)], dim=-1) return prediction_dict
def forward(self, feed_dict): im_info = feed_dict[constants.KEY_IMAGE_INFO] prediction_dict = {} # base model rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'rpn_feat_maps': rpn_feat_maps}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: # proposals_dict proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_MOBILEYE] = None gt_dict[constants.KEY_DIMS] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[ constants.KEY_LABEL_BOXES_3D] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[ constants.KEY_MEAN_DIMS] auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[ constants.KEY_IMAGE_INFO] proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) pooled_feat_for_corners = self.feature_extractor.second_stage_feature( pooled_feat) # pooled_feat_for_keypoint = F.upsample_bilinear( # pooled_feat_for_corners, size=(14, 14)) keypoint_map = self.keypoint_predictor(pooled_feat) # keypoint_map = self.rcnn_keypoint_preds(pooled_feat_for_keypoint) keypoint_map = keypoint_map.mean(-2) # keypoint_map = F.softmax(keypoint_map, dim=-1) pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners) rcnn_corners_preds = self.rcnn_corners_preds[i]( pooled_feat_for_corners) rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) if not self.class_agnostic: # import ipdb # ipdb.set_trace() if self.training: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1)) else: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, rcnn_cls_probs.argmax(dim=-1).view(-1)) # not class_agnostic for dims # import ipdb # ipdb.set_trace() if not self.class_agnostic_3d: if self.training: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1), out_c=3) else: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, rcnn_cls_probs.argmax(dim=-1).view(-1), out_c=3) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) rcnn_corners_preds = rcnn_corners_preds.view( batch_size, rcnn_bbox_preds.shape[1], -1) # rcnn_depth_preds = rcnn_depth_preds.view( # batch_size, rcnn_bbox_preds.shape[1], -1) # rcnn_center_depth_preds = rcnn_center_depth_preds.view( # batch_size, rcnn_bbox_preds.shape[1], -1) # concat them(depth and corners) # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_depth_preds], dim=-1) # # append center depth # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1) # rcnn_visibility_preds = rcnn_visibility_preds.view( # batch_size, rcnn_bbox_preds.shape[1], -1) rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3) # decode for next stage coder = bbox_coders.build({'type': constants.KEY_DIMS}) rcnn_dim_preds = coder.decode_batch( rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS], rcnn_cls_probs).detach() # rcnn_corners_preds = coder.decode_batch( # rcnn_corners_preds.detach(), proposals) # import ipdb # ipdb.set_trace() # if self.training_depth: # # predict for depth # rois = box_ops.box2rois(proposals) # pooled_feat_for_depth = self.pyramid_rcnn_pooling( # rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) # pooled_feat_for_depth = self.third_stage_feature(pooled_feat) # pooled_feat_for_depth = pooled_feat_for_depth.mean(3).mean(2) # rcnn_depth_preds = self.rcnn_depth_preds[i](pooled_feat_for_depth) # encode # rcnn_depth_preds = 1 / (rcnn_depth_preds.sigmoid() + 1e-6) - 1 # rcnn_depth_preds = rcnn_depth_preds.view( # batch_size, rcnn_bbox_preds.shape[1], -1) # # concat them(depth and corners) # rcnn_corners_preds = self.fuse_corners_and_depth( # rcnn_corners_preds, rcnn_depth_preds) # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_depth_preds], dim=-1) # # # append center depth # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds loss_units[constants.KEY_MOBILEYE]['pred'] = rcnn_corners_preds multi_stage_loss_units.append([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D], loss_units[constants.KEY_MOBILEYE], loss_units[constants.KEY_DIMS] ]) multi_stage_stats.append(stats) coder = bbox_coders.build({'type': constants.KEY_MOBILEYE}) rcnn_corners_preds = coder.decode_batch(rcnn_corners_preds.detach(), proposals, keypoint_map.detach()) prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds prediction_dict[constants.KEY_KEYPOINTS_HEATMAP] = keypoint_map # if self.training: # corners_2d_gt = coder.decode_batch( # loss_units[constants.KEY_MOBILEYE]['target'], proposals) # prediction_dict['corners_2d_gt'] = corners_2d_gt if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats else: prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs coder = bbox_coders.build(self.target_generators[i]. target_generator_config['coder_config']) proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach() image_info = feed_dict[constants.KEY_IMAGE_INFO] proposals[:, :, :: 2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze( -1).unsqueeze(-1) proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[:, 2].unsqueeze( -1).unsqueeze(-1) rcnn_corners_preds[:, :, :, 0] = rcnn_corners_preds[:, :, :, 0] / image_info[:, None, None, 3] rcnn_corners_preds[:, :, :, 1] = rcnn_corners_preds[:, :, :, 1] / image_info[:, None, None, 2] prediction_dict[constants.KEY_BOXES_2D] = proposals # prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds # prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds if self.training: loss_dict = self.loss(prediction_dict, feed_dict) return prediction_dict, loss_dict else: return prediction_dict
def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds, im_info): # TODO create a new Function """ Args: rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W) rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W) anchors: FloatTensor,shape(N,4,H,W) Returns: proposals_batch: FloatTensor, shape(N,post_nms_topN,4) fg_probs_batch: FloatTensor, shape(N,post_nms_topN) """ # assert len( # rpn_bbox_preds) == 1, 'just one feature maps is supported now' # rpn_bbox_preds = rpn_bbox_preds[0] # do not backward rpn_cls_probs = rpn_cls_probs.detach() rpn_bbox_preds = rpn_bbox_preds.detach() batch_size = rpn_bbox_preds.shape[0] coders = bbox_coders.build( self.target_generators.target_generator_config['coder_config']) proposals = coders.decode_batch(rpn_bbox_preds, anchors) # filer and clip proposals = box_ops.clip_boxes(proposals, im_info) # fg prob fg_probs = rpn_cls_probs[:, :, 1] # sort fg _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True) # fg_probs_batch = torch.zeros(batch_size, # self.post_nms_topN).type_as(rpn_cls_probs) proposals_batch = torch.zeros(batch_size, self.post_nms_topN, 4).type_as(rpn_bbox_preds) proposals_order = torch.zeros( batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order) for i in range(batch_size): proposals_single = proposals[i] fg_probs_single = fg_probs[i] fg_order_single = fg_probs_order[i] # pre nms if self.pre_nms_topN > 0: fg_order_single = fg_order_single[:self.pre_nms_topN] proposals_single = proposals_single[fg_order_single] fg_probs_single = fg_probs_single[fg_order_single] # nms keep_idx_i = nms(proposals_single, fg_probs_single, self.nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) # post nms if self.post_nms_topN > 0: keep_idx_i = keep_idx_i[:self.post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] fg_probs_single = fg_probs_single[keep_idx_i] fg_order_single = fg_order_single[keep_idx_i] # padding 0 at the end. num_proposal = keep_idx_i.numel() proposals_batch[i, :num_proposal, :] = proposals_single # fg_probs_batch[i, :num_proposal] = fg_probs_single proposals_order[i, :num_proposal] = fg_order_single return proposals_batch, proposals_order
def forward(self, feed_dict): im_info = feed_dict[constants.KEY_IMAGE_INFO] prediction_dict = {} # base model rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'rpn_feat_maps': rpn_feat_maps}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: # proposals_dict proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_CORNERS_2D] = None # gt_dict[constants.KEY_CORNERS_VISIBILITY] = None # gt_dict[constants.KEY_ORIENTS_V2] = None gt_dict[constants.KEY_DIMS] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[ constants.KEY_LABEL_BOXES_3D] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[ constants.KEY_MEAN_DIMS] auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[ constants.KEY_IMAGE_INFO] proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) pooled_feat_for_corners = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners) rcnn_corners_preds = self.rcnn_corners_preds[i]( pooled_feat_for_corners) rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) rcnn_corners_preds = rcnn_corners_preds.view( batch_size, rcnn_bbox_preds.shape[1], -1) rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3) rcnn_depth_preds = self.rcnn_depth_preds[i]( pooled_feat_for_corners) # rcnn_depth_preds = 1 / (rcnn_depth_preds.sigmoid() + 1e-6) - 1 rcnn_depth_preds = rcnn_depth_preds.view(batch_size, rcnn_bbox_preds.shape[1], -1) rcnn_corners_preds = self.fuse_corners_and_depth( rcnn_corners_preds, rcnn_depth_preds) # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_depth_preds], dim=-1) # # # append center depth # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds loss_units[ constants.KEY_CORNERS_2D]['pred'] = rcnn_corners_preds # loss_units[constants.KEY_CORNERS_VISIBILITY][ # 'pred'] = rcnn_visibility_preds # import ipdb # ipdb.set_trace() multi_stage_loss_units.append([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D], loss_units[constants.KEY_CORNERS_2D], loss_units[constants.KEY_DIMS] ]) multi_stage_stats.append(stats) else: # import ipdb # ipdb.set_trace() center_depth = rcnn_corners_preds[:, :, -1:] coder = bbox_coders.build( {'type': constants.KEY_CORNERS_2D_NEAREST_DEPTH}) rcnn_corners_preds = coder.decode_batch( rcnn_corners_preds.detach(), proposals, feed_dict[constants.KEY_STEREO_CALIB_P2]) coder = bbox_coders.build( self.target_generators[i]. target_generator_config['coder_config']) proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach() coder = bbox_coders.build({'type': constants.KEY_DIMS}) rcnn_dim_preds = coder.decode_batch( rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS], rcnn_cls_probs).detach() if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats else: prediction_dict[constants.KEY_CENTER_DEPTH] = center_depth prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs image_info = feed_dict[constants.KEY_IMAGE_INFO] proposals[:, :, :: 2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze( -1).unsqueeze(-1) proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[:, 2].unsqueeze( -1).unsqueeze(-1) rcnn_corners_preds[:, :, :, 0] = rcnn_corners_preds[:, :, :, 0] / image_info[:, None, None, 3:4] rcnn_corners_preds[:, :, :, 1] = rcnn_corners_preds[:, :, :, 1] / image_info[:, None, None, 2:3] prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds prediction_dict[constants.KEY_BOXES_2D] = proposals prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds if self.training: loss_dict = self.loss(prediction_dict, feed_dict) return prediction_dict, loss_dict else: return prediction_dict
def _set_coders(self, name): import bbox_coders config = {'type': name} self._coders = bbox_coders.build(config)
def forward(self, feed_dict): im_info = feed_dict[constants.KEY_IMAGE_INFO] prediction_dict = {} # base model rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'rpn_feat_maps': rpn_feat_maps}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: # proposals_dict proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_CORNERS_2D] = None gt_dict[constants.KEY_DIMS] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[ constants.KEY_LABEL_BOXES_3D] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[ constants.KEY_MEAN_DIMS] auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[ constants.KEY_IMAGE_INFO] proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) pooled_feat_for_corners = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners) rcnn_corners_preds = self.rcnn_corners_preds[i]( pooled_feat_for_corners) rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) if not self.class_agnostic: if self.training: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1)) else: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, rcnn_cls_probs.argmax(dim=-1).view(-1)) # not class_agnostic for dims if not self.class_agnostic_3d: if self.training: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1), out_c=3) else: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, rcnn_cls_probs.argmax(dim=-1).view(-1), out_c=3) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) rcnn_corners_preds = rcnn_corners_preds.view( batch_size, rcnn_bbox_preds.shape[1], -1) rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3) # shape(N,C,1,1) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds loss_units[ constants.KEY_CORNERS_2D]['pred'] = rcnn_corners_preds multi_stage_loss_units.append([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D], loss_units[constants.KEY_CORNERS_2D], loss_units[constants.KEY_DIMS] ]) multi_stage_stats.append(stats) # decode for next stage # corners decode corners_coder = bbox_coders.build( {'type': constants.KEY_CORNERS_2D_STABLE}) rcnn_corners_preds = corners_coder.decode_batch( rcnn_corners_preds.detach(), proposals) # bbox decode boxes_coder = bbox_coders.build( self.target_generators[i]. target_generator_config['coder_config']) proposals = boxes_coder.decode_batch(rcnn_bbox_preds, proposals).detach() # dims decode dims_coder = bbox_coders.build({'type': constants.KEY_DIMS}) rcnn_dim_preds = dims_coder.decode_batch( rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS], rcnn_cls_probs).detach() if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats else: prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs image_info = feed_dict[constants.KEY_IMAGE_INFO] image_info = image_info.unsqueeze(1).unsqueeze(1) proposals[:, :, ::2] = proposals[:, :, ::2] / image_info[..., 3] proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[..., 2] rcnn_corners_preds[..., 0] = rcnn_corners_preds[ ..., 0] / image_info[..., 3].unsqueeze(-1) rcnn_corners_preds[..., 1] = rcnn_corners_preds[ ..., 1] / image_info[..., 2].unsqueeze(-1) prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds prediction_dict[constants.KEY_BOXES_2D] = proposals prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds if self.training: loss_dict = self.loss(prediction_dict, feed_dict) return prediction_dict, loss_dict else: return prediction_dict
def forward(self, feed_dict): features = self.feature_extractor(feed_dict[constants.KEY_IMAGE]) y_locs1 = [] y_locs2 = [] y_os = [] y_cls = [] for i, x in enumerate(features): # location out loc_feature = self.loc_feature1(x) loc1 = self.box_out1(loc_feature) N = loc1.size(0) loc1 = loc1.permute(0, 2, 3, 1).contiguous() loc1 = loc1.view(N, -1, self.num_regress) y_locs1.append(loc1) loc_feature = torch.cat([x, loc_feature], dim=1) loc_feature = self.loc_feature2(loc_feature) loc2 = self.box_out2(loc_feature) N = loc2.size(0) loc2 = loc2.permute(0, 2, 3, 1).contiguous() loc2 = loc2.view(N, -1, self.num_regress) loc2 += loc1 y_locs2.append(loc2) # os out cls_feature = self.cls_feature1(x) os_out = self.os_out(cls_feature) os_out = os_out.permute(0, 2, 3, 1).contiguous() # _size = os_out.size(1) os_out = os_out.view(N, -1, 2) y_os.append(os_out) cls_feature = torch.cat([x, cls_feature], dim=1) cls_feature = self.cls_feature2(cls_feature) cls_out = self.cls_out(cls_feature) cls_out = cls_out.permute(0, 2, 3, 1).contiguous() cls_out = cls_out.view(N, -1, self.num_classes) y_cls.append(cls_out) loc1_preds = torch.cat(y_locs1, dim=1) loc2_preds = torch.cat(y_locs2, dim=1) os_preds = torch.cat(y_os, dim=1) cls_preds = torch.cat(y_cls, dim=1) # if self.training: # prediction_dict = { # 'loc1_preds': loc1_preds, # 'loc2_preds': loc2_preds, # 'os_preds': os_preds, # 'cls_preds': cls_preds # } # stats = { # 'recall': torch.tensor([1, 1]).to('cuda').float().unsqueeze(0) # } # prediction_dict[constants.KEY_STATS] = [stats] # else: # prediction_dict = {} # cls_probs = F.softmax(cls_preds, dim=-1) # os_probs = F.softmax(os_preds, dim=-1)[:, :, 1:] # os_probs[os_probs <= 0.4] = 0 # prediction_dict[constants.KEY_CLASSES] = cls_probs * os_probs # # prediction_dict[constants.KEY_OBJECTNESS] = os_preds # image_info = feed_dict[constants.KEY_IMAGE_INFO] # variances = [0.1, 0.2] # default_boxes = feed_dict['default_boxes'][0] # new_default_boxes = torch.cat([ # default_boxes[:, :2] - default_boxes[:, 2:] / 2, # default_boxes[:, :2] + default_boxes[:, 2:] / 2 # ], 1) # xymin = loc2_preds[0, :, :2] * variances[ # 0] * default_boxes[:, 2:] + new_default_boxes[:, :2] # xymax = loc2_preds[0, :, 2:] * variances[ # 0] * default_boxes[:, 2:] + new_default_boxes[:, 2:] # proposals = torch.cat([xymin, xymax], 1).unsqueeze(0) # [8732,4] # image_info = image_info.unsqueeze(-1).unsqueeze(-1) # proposals[:, :, :: # 2] = proposals[:, :, :: # 2] * image_info[:, 1] / image_info[:, 3] # proposals[:, :, 1:: # 2] = proposals[:, :, 1:: # 2] * image_info[:, 0] / image_info[:, 2] # prediction_dict[constants.KEY_BOXES_2D] = proposals # return prediction_dict image_info = feed_dict[constants.KEY_IMAGE_INFO] batch_size = loc1_preds.shape[0] anchors = self.anchors.cuda() anchors = anchors.repeat(batch_size, 1, 1) coder = bbox_coders.build({'type': constants.KEY_BOXES_2D}) proposals = coder.decode_batch(loc2_preds, anchors).detach() # if self.normlize_anchor: # denormalize # h = image_info[:, 0].unsqueeze(-1).unsqueeze(-1) # w = image_info[:, 1].unsqueeze(-1).unsqueeze(-1) # proposals[:, :, ::2] = proposals[:, :, ::2] * w # proposals[:, :, 1::2] = proposals[:, :, 1::2] * h cls_probs = F.softmax(cls_preds.detach(), dim=-1) os_probs = F.softmax(os_preds.detach(), dim=-1)[:, :, 1:] os_probs[os_probs <= 0.4] = 0 final_probs = cls_probs * os_probs # import ipdb # ipdb.set_trace() # final_probs = cls_probs image_info = feed_dict[constants.KEY_IMAGE_INFO].unsqueeze( -1).unsqueeze(-1) prediction_dict = {} if self.training: # anchors = prediction_dict['anchors'] anchors_dict = {} anchors_dict[constants.KEY_PRIMARY] = anchors anchors_dict[constants.KEY_BOXES_2D] = loc1_preds anchors_dict[constants.KEY_BOXES_2D_REFINE] = loc2_preds anchors_dict[constants.KEY_CLASSES] = cls_preds anchors_dict[constants.KEY_OBJECTNESS] = os_preds # anchors_dict[constants.KEY_FINAL_PROBS] = final_probs gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_OBJECTNESS] = None gt_dict[constants.KEY_BOXES_2D_REFINE] = None auxiliary_dict = {} label_boxes_2d = feed_dict[constants.KEY_LABEL_BOXES_2D] if self.normlize_anchor: label_boxes_2d[:, :, :: 2] = label_boxes_2d[:, :, ::2] / image_info[:, 1] label_boxes_2d[:, :, 1::2] = label_boxes_2d[:, :, 1::2] / image_info[:, 0] auxiliary_dict[constants.KEY_BOXES_2D] = label_boxes_2d gt_labels = feed_dict[constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_CLASSES] = gt_labels auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = anchors proposals_dict, targets, stats = self.target_generators.generate_targets( anchors_dict, gt_dict, auxiliary_dict, subsample=False) # recall anchors_dict[constants.KEY_PRIMARY] = proposals _, _, second_stage_stats = self.target_generators.generate_targets( anchors_dict, gt_dict, auxiliary_dict, subsample=False) # precision fg_probs, _ = final_probs[:, :, 1:].max(dim=-1) fake_match = auxiliary_dict[constants.KEY_FAKE_MATCH] second_stage_stats.update( Analyzer.analyze_precision( fake_match, fg_probs, feed_dict[constants.KEY_NUM_INSTANCES], thresh=0.3)) prediction_dict[constants.KEY_STATS] = [stats, second_stage_stats] prediction_dict[constants.KEY_TARGETS] = targets else: prediction_dict[constants.KEY_CLASSES] = final_probs # prediction_dict[constants.KEY_OBJECTNESS] = os_preds proposals[:, :, ::2] = proposals[:, :, ::2] / image_info[:, 3] proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[:, 2] prediction_dict[constants.KEY_BOXES_2D] = proposals return prediction_dict
def forward(self, feed_dict): # import ipdb # ipdb.set_trace() im_info = feed_dict[constants.KEY_IMAGE_INFO] prediction_dict = {} # base model rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'rpn_feat_maps': rpn_feat_maps}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: # proposals_dict proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[constants. KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_ORIENTS_V3] = None gt_dict[constants.KEY_DIMS] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[ constants.KEY_LABEL_BOXES_3D] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[ constants.KEY_MEAN_DIMS] proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat = pooled_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat) rcnn_orient_preds = self.rcnn_orient_preds[i](pooled_feat) rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) if not self.class_agnostic: # import ipdb # ipdb.set_trace() if self.training: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1)) else: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, rcnn_cls_probs.argmax(dim=-1).view(-1)) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) rcnn_orient_preds = rcnn_orient_preds.view(batch_size, -1, 4 * self.num_bins) rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds loss_units[constants.KEY_ORIENTS_V3]['pred'] = rcnn_orient_preds loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds # import ipdb # ipdb.set_trace() multi_stage_loss_units.append([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D], loss_units[constants.KEY_ORIENTS_V3], loss_units[constants.KEY_DIMS] ]) multi_stage_stats.append(stats) # decode for next stage coder = bbox_coders.build({'type': constants.KEY_BOXES_2D}) # rpn_proposals = proposals proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach() coder = bbox_coders.build({'type': constants.KEY_DIMS}) rcnn_dim_preds = coder.decode_batch( rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS], rcnn_cls_probs).detach() coder = bbox_coders.build({'type': constants.KEY_ORIENTS_V3}) # use rpn proposals to decode rcnn_orient_preds = coder.decode_batch( rcnn_orient_preds, self.rcnn_orient_loss.bin_centers, proposals, feed_dict[constants.KEY_STEREO_CALIB_P2]).detach() if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats else: prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs prediction_dict[constants.KEY_ORIENTS_V3] = rcnn_orient_preds image_info = feed_dict[constants.KEY_IMAGE_INFO] proposals[:, :, ::2] = proposals[:, :, :: 2] / image_info[:, 3].unsqueeze( -1).unsqueeze(-1) proposals[:, :, 1::2] = proposals[:, :, 1:: 2] / image_info[:, 2].unsqueeze( -1).unsqueeze(-1) prediction_dict[constants.KEY_BOXES_2D] = proposals prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds prediction_dict[constants.KEY_ORIENTS_V3] = rcnn_orient_preds return prediction_dict
def forward(self, feed_dict): im_info = feed_dict[constants.KEY_IMAGE_INFO] prediction_dict = {} # base model rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'rpn_feat_maps': rpn_feat_maps}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: # proposals_dict proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_CORNERS_3D_GRNET] = None # gt_dict[constants.KEY_CORNERS_VISIBILITY] = None # gt_dict[constants.KEY_ORIENTS_V2] = None gt_dict[constants.KEY_DIMS] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[ constants.KEY_LABEL_BOXES_3D] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[ constants.KEY_MEAN_DIMS] auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[ constants.KEY_IMAGE_INFO] proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat = pooled_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat) rcnn_corners_preds = self.rcnn_corners_preds[i](pooled_feat) # rcnn_visibility_preds = self.rcnn_visibility_preds[i](pooled_feat) rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) if not self.class_agnostic: # import ipdb # ipdb.set_trace() if self.training: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1)) else: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, rcnn_cls_probs.argmax(dim=-1).view(-1)) # not class_agnostic for dims # import ipdb # ipdb.set_trace() if not self.class_agnostic_3d: if self.training: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1), out_c=3) else: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, rcnn_cls_probs.argmax(dim=-1).view(-1), out_c=3) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) rcnn_corners_preds = rcnn_corners_preds.view( batch_size, rcnn_bbox_preds.shape[1], -1) # rcnn_visibility_preds = rcnn_visibility_preds.view( # batch_size, rcnn_bbox_preds.shape[1], -1) rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds loss_units[constants.KEY_CORNERS_3D_GRNET][ 'pred'] = rcnn_corners_preds # loss_units[constants.KEY_CORNERS_VISIBILITY][ # 'pred'] = rcnn_visibility_preds # import ipdb # ipdb.set_trace() multi_stage_loss_units.append([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D], loss_units[constants.KEY_CORNERS_3D_GRNET], loss_units[constants.KEY_DIMS] ]) multi_stage_stats.append(stats) else: # decode for next stage coder = bbox_coders.build({ 'type': constants.KEY_CORNERS_3D_GRNET }) rcnn_corners_preds = coder.decode_batch( rcnn_corners_preds.detach(), proposals, feed_dict[constants.KEY_STEREO_CALIB_P2]) coder = bbox_coders.build( self.target_generators[i] .target_generator_config['coder_config']) proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach() coder = bbox_coders.build({'type': constants.KEY_DIMS}) rcnn_dim_preds = coder.decode_batch( rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS], rcnn_cls_probs).detach() if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats prediction_dict[constants.KEY_PROPOSALS] = proposals else: prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs image_info = feed_dict[constants.KEY_IMAGE_INFO] proposals[:, :, ::2] = proposals[:, :, :: 2] / image_info[:, 3].unsqueeze( -1).unsqueeze(-1) proposals[:, :, 1::2] = proposals[:, :, 1:: 2] / image_info[:, 2].unsqueeze( -1).unsqueeze(-1) # rcnn_corners_preds = coder.decode_batch( # rcnn_corners_preds.detach(), proposals) # import ipdb # ipdb.set_trace() # rcnn_corners_preds = torch.bmm( # feed_dict[constants.KEY_STEREO_CALIB_P2_ORIG], # rcnn_corners_preds) # assert rcnn_corners_preds.shape[0] == 1 # rcnn_corners_preds = geometry_utils.torch_points_3d_to_points_2d( # rcnn_corners_preds[0].view(-1, 3), # feed_dict[constants.KEY_STEREO_CALIB_P2_ORIG][0]).view(-1, 8, # 2) N, M = rcnn_corners_preds.shape[:2] rcnn_corners_preds = rcnn_corners_preds.view(N, M, 8, 2) rcnn_corners_preds[:, :, :, 0] = rcnn_corners_preds[:, :, :, 0] / image_info[:, 3] rcnn_corners_preds[:, :, :, 1] = rcnn_corners_preds[:, :, :, 1] / image_info[:, 2] prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds prediction_dict[constants.KEY_BOXES_2D] = proposals prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds if self.training: loss_dict = self.loss(prediction_dict, feed_dict) return prediction_dict, loss_dict else: return prediction_dict
def __init__(self, config): self.coder = bbox_coders.build(config) self.bg_thresh = config.get('bg_thresh', 0)