def forward(self, feed_dict): # prediction_dict = {} output_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'base_feat': base_feat}) self.add_feat('base_feat', base_feat) # rpn model output_dict.update(self.rpn_model.forward(feed_dict)) proposals = output_dict['proposals'] multi_stage_loss_units = [] for i in range(self.num_stages): if self.training: auxiliary_dict = {} auxiliary_dict[constants.KEY_PROPOSALS] = proposals # proposals_dict, loss_units = self.instance.target_generators[ # i].generate_targets(output_dict, feed_dict, auxiliary_dict) losses = self.instance_info.generate_losses( output_dict, feed_dict, auxiliary_dict) losses, subsampled_mask = self.sampler.subsample_losses(losses) proposals, _ = self.sampler.subsample_outputs( proposals, subsampled_mask) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) # proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.rcnn_pooling(base_feat, rois.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat = pooled_feat.mean(3).mean(2) # rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat) # rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat) for attr_name in self.branches: attr_preds = self.branches[attr_name][i](pooled_feat) output_dict[attr_name] = attr_preds # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # batch_size = rois.shape[0] # rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, # self.n_classes) # rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, # self.n_classes) # rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) # output_dict.update({constants.KEY_}) if self.training: losses.update_from_output(output_dict) # decode instance = self.instance_info.generate_instance(output_dict) # decode for next stage # coder = bbox_coders.build({'type': constants.KEY_BOXES_2D}) # proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach() if self.training: # prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units return losses, stats else: return instance
def forward(self, feed_dict): im_info = feed_dict[constants.KEY_IMAGE_INFO] prediction_dict = {} # base model rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'rpn_feat_maps': rpn_feat_maps}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: # proposals_dict proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_CORNERS_3D_GRNET] = None # gt_dict[constants.KEY_CORNERS_VISIBILITY] = None # gt_dict[constants.KEY_ORIENTS_V2] = None gt_dict[constants.KEY_DIMS] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[ constants.KEY_LABEL_BOXES_3D] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[ constants.KEY_MEAN_DIMS] auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[ constants.KEY_IMAGE_INFO] proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat = pooled_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat) rcnn_corners_preds = self.rcnn_corners_preds[i](pooled_feat) # rcnn_visibility_preds = self.rcnn_visibility_preds[i](pooled_feat) rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) if not self.class_agnostic: # import ipdb # ipdb.set_trace() if self.training: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1)) else: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, rcnn_cls_probs.argmax(dim=-1).view(-1)) # not class_agnostic for dims # import ipdb # ipdb.set_trace() if not self.class_agnostic_3d: if self.training: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1), out_c=3) else: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, rcnn_cls_probs.argmax(dim=-1).view(-1), out_c=3) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) rcnn_corners_preds = rcnn_corners_preds.view( batch_size, rcnn_bbox_preds.shape[1], -1) # rcnn_visibility_preds = rcnn_visibility_preds.view( # batch_size, rcnn_bbox_preds.shape[1], -1) rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds loss_units[constants.KEY_CORNERS_3D_GRNET][ 'pred'] = rcnn_corners_preds # loss_units[constants.KEY_CORNERS_VISIBILITY][ # 'pred'] = rcnn_visibility_preds # import ipdb # ipdb.set_trace() multi_stage_loss_units.append([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D], loss_units[constants.KEY_CORNERS_3D_GRNET], loss_units[constants.KEY_DIMS] ]) multi_stage_stats.append(stats) else: # decode for next stage coder = bbox_coders.build({ 'type': constants.KEY_CORNERS_3D_GRNET }) rcnn_corners_preds = coder.decode_batch( rcnn_corners_preds.detach(), proposals, feed_dict[constants.KEY_STEREO_CALIB_P2]) coder = bbox_coders.build( self.target_generators[i] .target_generator_config['coder_config']) proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach() coder = bbox_coders.build({'type': constants.KEY_DIMS}) rcnn_dim_preds = coder.decode_batch( rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS], rcnn_cls_probs).detach() if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats prediction_dict[constants.KEY_PROPOSALS] = proposals else: prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs image_info = feed_dict[constants.KEY_IMAGE_INFO] proposals[:, :, ::2] = proposals[:, :, :: 2] / image_info[:, 3].unsqueeze( -1).unsqueeze(-1) proposals[:, :, 1::2] = proposals[:, :, 1:: 2] / image_info[:, 2].unsqueeze( -1).unsqueeze(-1) # rcnn_corners_preds = coder.decode_batch( # rcnn_corners_preds.detach(), proposals) # import ipdb # ipdb.set_trace() # rcnn_corners_preds = torch.bmm( # feed_dict[constants.KEY_STEREO_CALIB_P2_ORIG], # rcnn_corners_preds) # assert rcnn_corners_preds.shape[0] == 1 # rcnn_corners_preds = geometry_utils.torch_points_3d_to_points_2d( # rcnn_corners_preds[0].view(-1, 3), # feed_dict[constants.KEY_STEREO_CALIB_P2_ORIG][0]).view(-1, 8, # 2) N, M = rcnn_corners_preds.shape[:2] rcnn_corners_preds = rcnn_corners_preds.view(N, M, 8, 2) rcnn_corners_preds[:, :, :, 0] = rcnn_corners_preds[:, :, :, 0] / image_info[:, 3] rcnn_corners_preds[:, :, :, 1] = rcnn_corners_preds[:, :, :, 1] / image_info[:, 2] prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds prediction_dict[constants.KEY_BOXES_2D] = proposals prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds if self.training: loss_dict = self.loss(prediction_dict, feed_dict) return prediction_dict, loss_dict else: return prediction_dict
def forward(self, feed_dict): im_info = feed_dict[constants.KEY_IMAGE_INFO] prediction_dict = {} # base model rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'rpn_feat_maps': rpn_feat_maps}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: # proposals_dict proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_CORNERS_2D] = None gt_dict[constants.KEY_DIMS] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[ constants.KEY_LABEL_BOXES_3D] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[ constants.KEY_MEAN_DIMS] auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[ constants.KEY_IMAGE_INFO] proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) pooled_feat_for_corners = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners) rcnn_corners_preds = self.rcnn_corners_preds[i]( pooled_feat_for_corners) rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) if not self.class_agnostic: if self.training: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1)) else: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, rcnn_cls_probs.argmax(dim=-1).view(-1)) # not class_agnostic for dims if not self.class_agnostic_3d: if self.training: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1), out_c=3) else: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, rcnn_cls_probs.argmax(dim=-1).view(-1), out_c=3) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) rcnn_corners_preds = rcnn_corners_preds.view( batch_size, rcnn_bbox_preds.shape[1], -1) rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3) # shape(N,C,1,1) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds loss_units[ constants.KEY_CORNERS_2D]['pred'] = rcnn_corners_preds multi_stage_loss_units.append([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D], loss_units[constants.KEY_CORNERS_2D], loss_units[constants.KEY_DIMS] ]) multi_stage_stats.append(stats) # decode for next stage # corners decode corners_coder = bbox_coders.build( {'type': constants.KEY_CORNERS_2D_STABLE}) rcnn_corners_preds = corners_coder.decode_batch( rcnn_corners_preds.detach(), proposals) # bbox decode boxes_coder = bbox_coders.build( self.target_generators[i]. target_generator_config['coder_config']) proposals = boxes_coder.decode_batch(rcnn_bbox_preds, proposals).detach() # dims decode dims_coder = bbox_coders.build({'type': constants.KEY_DIMS}) rcnn_dim_preds = dims_coder.decode_batch( rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS], rcnn_cls_probs).detach() if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats else: prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs image_info = feed_dict[constants.KEY_IMAGE_INFO] image_info = image_info.unsqueeze(1).unsqueeze(1) proposals[:, :, ::2] = proposals[:, :, ::2] / image_info[..., 3] proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[..., 2] rcnn_corners_preds[..., 0] = rcnn_corners_preds[ ..., 0] / image_info[..., 3].unsqueeze(-1) rcnn_corners_preds[..., 1] = rcnn_corners_preds[ ..., 1] / image_info[..., 2].unsqueeze(-1) prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds prediction_dict[constants.KEY_BOXES_2D] = proposals prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds if self.training: loss_dict = self.loss(prediction_dict, feed_dict) return prediction_dict, loss_dict else: return prediction_dict
def forward(self, feed_dict): # import ipdb # ipdb.set_trace() im_info = feed_dict[constants.KEY_IMAGE_INFO] prediction_dict = {} # base model rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'rpn_feat_maps': rpn_feat_maps}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: # proposals_dict proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[constants. KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_ORIENTS_V3] = None gt_dict[constants.KEY_DIMS] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[ constants.KEY_LABEL_BOXES_3D] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[ constants.KEY_MEAN_DIMS] proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat = pooled_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat) rcnn_orient_preds = self.rcnn_orient_preds[i](pooled_feat) rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) if not self.class_agnostic: # import ipdb # ipdb.set_trace() if self.training: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1)) else: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, rcnn_cls_probs.argmax(dim=-1).view(-1)) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) rcnn_orient_preds = rcnn_orient_preds.view(batch_size, -1, 4 * self.num_bins) rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds loss_units[constants.KEY_ORIENTS_V3]['pred'] = rcnn_orient_preds loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds # import ipdb # ipdb.set_trace() multi_stage_loss_units.append([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D], loss_units[constants.KEY_ORIENTS_V3], loss_units[constants.KEY_DIMS] ]) multi_stage_stats.append(stats) # decode for next stage coder = bbox_coders.build({'type': constants.KEY_BOXES_2D}) # rpn_proposals = proposals proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach() coder = bbox_coders.build({'type': constants.KEY_DIMS}) rcnn_dim_preds = coder.decode_batch( rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS], rcnn_cls_probs).detach() coder = bbox_coders.build({'type': constants.KEY_ORIENTS_V3}) # use rpn proposals to decode rcnn_orient_preds = coder.decode_batch( rcnn_orient_preds, self.rcnn_orient_loss.bin_centers, proposals, feed_dict[constants.KEY_STEREO_CALIB_P2]).detach() if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats else: prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs prediction_dict[constants.KEY_ORIENTS_V3] = rcnn_orient_preds image_info = feed_dict[constants.KEY_IMAGE_INFO] proposals[:, :, ::2] = proposals[:, :, :: 2] / image_info[:, 3].unsqueeze( -1).unsqueeze(-1) proposals[:, :, 1::2] = proposals[:, :, 1:: 2] / image_info[:, 2].unsqueeze( -1).unsqueeze(-1) prediction_dict[constants.KEY_BOXES_2D] = proposals prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds prediction_dict[constants.KEY_ORIENTS_V3] = rcnn_orient_preds return prediction_dict
def forward(self, feed_dict): im_info = feed_dict[constants.KEY_IMAGE_INFO] auxiliary_dict = {} output_dict = {} losses_dict = {} # TODO move all auxiliary item from feed_dict to auxiliary_dict # before get data from dataloader if feed_dict.get(constants.KEY_STEREO_CALIB_P2) is not None: auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] # base model rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'base_feat': rpn_feat_maps}) # rpn model # prediction_dict.update(self.rpn_model.forward(feed_dict)) instance, rpn_losses = self.rpn_model.forward(feed_dict) auxiliary_dict.update(instance) proposals = auxiliary_dict[constants.KEY_BOXES_2D] multi_stage_stats = [] for i in range(self.num_stages): if self.training: losses_units = self.instance_info.generate_losses( output_dict, feed_dict, auxiliary_dict) losses_units, subsampled_mask = self.sampler.subsample_instance( losses_units) proposals, _ = self.sampler.subsample_instance( proposals, subsampled_mask) # update auxiliary dict # TODO subsample for all auxiliary_dict auxiliary_dict[constants.KEY_BOXES_2D] = proposals auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] multi_stage_stats.append( self.instance_info.generate_stats(auxiliary_dict)) rois = box_ops.box2rois(proposals) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat = pooled_feat.mean(3).mean(2) # collect output from network to output_dict for attr_name in self.branches: attr_preds = self.branches[attr_name][i](pooled_feat) output_dict[attr_name] = attr_preds # unsqueeze before calc loss batch_size = rois.shape[0] output_dict = self.instance_info.unsqueeze(output_dict, batch_size) if self.training: losses_units.update_from_output(output_dict) # decode instance = self.instance_info.generate_instance( output_dict, auxiliary_dict) if self.training: losses_dict.update(rpn_losses) losses = self.instance_info.calc_loss(losses_units) losses_dict.update(losses) else: losses_dict = None multi_stage_stats = None # rescale im_info = feed_dict[constants.KEY_IMAGE_INFO] instance = self.instance_info.affine_transform(instance, im_info) return instance, losses_dict, multi_stage_stats
def forward(self, feed_dict): im_info = feed_dict[constants.KEY_IMAGE_INFO] prediction_dict = {} # base model rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'rpn_feat_maps': rpn_feat_maps}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: # proposals_dict proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_MOBILEYE] = None gt_dict[constants.KEY_DIMS] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[ constants.KEY_LABEL_BOXES_3D] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[ constants.KEY_MEAN_DIMS] auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[ constants.KEY_IMAGE_INFO] proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) pooled_feat_for_corners = self.feature_extractor.second_stage_feature( pooled_feat) # pooled_feat_for_keypoint = F.upsample_bilinear( # pooled_feat_for_corners, size=(14, 14)) keypoint_map = self.keypoint_predictor(pooled_feat) # keypoint_map = self.rcnn_keypoint_preds(pooled_feat_for_keypoint) keypoint_map = keypoint_map.mean(-2) # keypoint_map = F.softmax(keypoint_map, dim=-1) pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners) rcnn_corners_preds = self.rcnn_corners_preds[i]( pooled_feat_for_corners) rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) if not self.class_agnostic: # import ipdb # ipdb.set_trace() if self.training: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1)) else: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, rcnn_cls_probs.argmax(dim=-1).view(-1)) # not class_agnostic for dims # import ipdb # ipdb.set_trace() if not self.class_agnostic_3d: if self.training: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1), out_c=3) else: rcnn_dim_preds = self.squeeze_bbox_preds( rcnn_dim_preds, rcnn_cls_probs.argmax(dim=-1).view(-1), out_c=3) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) rcnn_corners_preds = rcnn_corners_preds.view( batch_size, rcnn_bbox_preds.shape[1], -1) # rcnn_depth_preds = rcnn_depth_preds.view( # batch_size, rcnn_bbox_preds.shape[1], -1) # rcnn_center_depth_preds = rcnn_center_depth_preds.view( # batch_size, rcnn_bbox_preds.shape[1], -1) # concat them(depth and corners) # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_depth_preds], dim=-1) # # append center depth # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1) # rcnn_visibility_preds = rcnn_visibility_preds.view( # batch_size, rcnn_bbox_preds.shape[1], -1) rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3) # decode for next stage coder = bbox_coders.build({'type': constants.KEY_DIMS}) rcnn_dim_preds = coder.decode_batch( rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS], rcnn_cls_probs).detach() # rcnn_corners_preds = coder.decode_batch( # rcnn_corners_preds.detach(), proposals) # import ipdb # ipdb.set_trace() # if self.training_depth: # # predict for depth # rois = box_ops.box2rois(proposals) # pooled_feat_for_depth = self.pyramid_rcnn_pooling( # rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) # pooled_feat_for_depth = self.third_stage_feature(pooled_feat) # pooled_feat_for_depth = pooled_feat_for_depth.mean(3).mean(2) # rcnn_depth_preds = self.rcnn_depth_preds[i](pooled_feat_for_depth) # encode # rcnn_depth_preds = 1 / (rcnn_depth_preds.sigmoid() + 1e-6) - 1 # rcnn_depth_preds = rcnn_depth_preds.view( # batch_size, rcnn_bbox_preds.shape[1], -1) # # concat them(depth and corners) # rcnn_corners_preds = self.fuse_corners_and_depth( # rcnn_corners_preds, rcnn_depth_preds) # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_depth_preds], dim=-1) # # # append center depth # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds loss_units[constants.KEY_MOBILEYE]['pred'] = rcnn_corners_preds multi_stage_loss_units.append([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D], loss_units[constants.KEY_MOBILEYE], loss_units[constants.KEY_DIMS] ]) multi_stage_stats.append(stats) coder = bbox_coders.build({'type': constants.KEY_MOBILEYE}) rcnn_corners_preds = coder.decode_batch(rcnn_corners_preds.detach(), proposals, keypoint_map.detach()) prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds prediction_dict[constants.KEY_KEYPOINTS_HEATMAP] = keypoint_map # if self.training: # corners_2d_gt = coder.decode_batch( # loss_units[constants.KEY_MOBILEYE]['target'], proposals) # prediction_dict['corners_2d_gt'] = corners_2d_gt if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats else: prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs coder = bbox_coders.build(self.target_generators[i]. target_generator_config['coder_config']) proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach() image_info = feed_dict[constants.KEY_IMAGE_INFO] proposals[:, :, :: 2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze( -1).unsqueeze(-1) proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[:, 2].unsqueeze( -1).unsqueeze(-1) rcnn_corners_preds[:, :, :, 0] = rcnn_corners_preds[:, :, :, 0] / image_info[:, None, None, 3] rcnn_corners_preds[:, :, :, 1] = rcnn_corners_preds[:, :, :, 1] / image_info[:, None, None, 2] prediction_dict[constants.KEY_BOXES_2D] = proposals # prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds # prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds if self.training: loss_dict = self.loss(prediction_dict, feed_dict) return prediction_dict, loss_dict else: return prediction_dict
def forward(self, feed_dict): # import ipdb # ipdb.set_trace() prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'base_feat': base_feat}) self.add_feat('base_feat', base_feat) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.rcnn_pooling(base_feat, rois.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat = pooled_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) if not self.class_agnostic: # import ipdb # ipdb.set_trace() if self.training: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, loss_units[constants.KEY_CLASSES]['target'].view(-1)) else: rcnn_bbox_preds = self.squeeze_bbox_preds( rcnn_bbox_preds, rcnn_cls_probs.argmax(dim=-1).view(-1)) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds # import ipdb # ipdb.set_trace() multi_stage_loss_units.extend([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D] ]) multi_stage_stats.append(stats) # decode for next stage coder = bbox_coders.build(self.target_generators[i]. target_generator_config['coder_config']) proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach() if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats else: prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs image_info = feed_dict[constants.KEY_IMAGE_INFO] proposals[:, :, :: 2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze( -1).unsqueeze(-1) proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[:, 2].unsqueeze( -1).unsqueeze(-1) prediction_dict[constants.KEY_BOXES_2D] = proposals if self.training: loss_dict = self.loss(prediction_dict, feed_dict) return prediction_dict, loss_dict else: return prediction_dict
def forward(self, feed_dict): im_info = feed_dict[constants.KEY_IMAGE_INFO] prediction_dict = {} # base model rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'rpn_feat_maps': rpn_feat_maps}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) proposals = prediction_dict['proposals'] multi_stage_loss_units = [] multi_stage_stats = [] for i in range(self.num_stages): if self.training: # proposals_dict proposals_dict = {} proposals_dict[constants.KEY_PRIMARY] = proposals # gt_dict gt_dict = {} gt_dict[constants.KEY_PRIMARY] = feed_dict[ constants.KEY_LABEL_BOXES_2D] gt_dict[constants.KEY_CLASSES] = None gt_dict[constants.KEY_BOXES_2D] = None gt_dict[constants.KEY_CORNERS_2D] = None # gt_dict[constants.KEY_CORNERS_VISIBILITY] = None # gt_dict[constants.KEY_ORIENTS_V2] = None gt_dict[constants.KEY_DIMS] = None # auxiliary_dict(used for encoding) auxiliary_dict = {} auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[ constants.KEY_STEREO_CALIB_P2] auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[ constants.KEY_LABEL_BOXES_2D] auxiliary_dict[constants.KEY_CLASSES] = feed_dict[ constants.KEY_LABEL_CLASSES] auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[ constants.KEY_LABEL_BOXES_3D] auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] auxiliary_dict[constants.KEY_PROPOSALS] = proposals auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[ constants.KEY_MEAN_DIMS] auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[ constants.KEY_IMAGE_INFO] proposals_dict, loss_units, stats = self.target_generators[ i].generate_targets(proposals_dict, gt_dict, auxiliary_dict) # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) proposals = proposals_dict[constants.KEY_PRIMARY] rois = box_ops.box2rois(proposals) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2]) # shape(N,C,1,1) pooled_feat_for_corners = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners) rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners) rcnn_corners_preds = self.rcnn_corners_preds[i]( pooled_feat_for_corners) rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) batch_size = rois.shape[0] rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1, self.n_classes) rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1, self.n_classes) rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4) rcnn_corners_preds = rcnn_corners_preds.view( batch_size, rcnn_bbox_preds.shape[1], -1) rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3) rcnn_depth_preds = self.rcnn_depth_preds[i]( pooled_feat_for_corners) # rcnn_depth_preds = 1 / (rcnn_depth_preds.sigmoid() + 1e-6) - 1 rcnn_depth_preds = rcnn_depth_preds.view(batch_size, rcnn_bbox_preds.shape[1], -1) rcnn_corners_preds = self.fuse_corners_and_depth( rcnn_corners_preds, rcnn_depth_preds) # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_depth_preds], dim=-1) # # # append center depth # rcnn_corners_preds = torch.cat( # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1) if self.training: loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds loss_units[ constants.KEY_CORNERS_2D]['pred'] = rcnn_corners_preds # loss_units[constants.KEY_CORNERS_VISIBILITY][ # 'pred'] = rcnn_visibility_preds # import ipdb # ipdb.set_trace() multi_stage_loss_units.append([ loss_units[constants.KEY_CLASSES], loss_units[constants.KEY_BOXES_2D], loss_units[constants.KEY_CORNERS_2D], loss_units[constants.KEY_DIMS] ]) multi_stage_stats.append(stats) else: # import ipdb # ipdb.set_trace() center_depth = rcnn_corners_preds[:, :, -1:] coder = bbox_coders.build( {'type': constants.KEY_CORNERS_2D_NEAREST_DEPTH}) rcnn_corners_preds = coder.decode_batch( rcnn_corners_preds.detach(), proposals, feed_dict[constants.KEY_STEREO_CALIB_P2]) coder = bbox_coders.build( self.target_generators[i]. target_generator_config['coder_config']) proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach() coder = bbox_coders.build({'type': constants.KEY_DIMS}) rcnn_dim_preds = coder.decode_batch( rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS], rcnn_cls_probs).detach() if self.training: prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units prediction_dict[constants.KEY_STATS] = multi_stage_stats else: prediction_dict[constants.KEY_CENTER_DEPTH] = center_depth prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs image_info = feed_dict[constants.KEY_IMAGE_INFO] proposals[:, :, :: 2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze( -1).unsqueeze(-1) proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[:, 2].unsqueeze( -1).unsqueeze(-1) rcnn_corners_preds[:, :, :, 0] = rcnn_corners_preds[:, :, :, 0] / image_info[:, None, None, 3:4] rcnn_corners_preds[:, :, :, 1] = rcnn_corners_preds[:, :, :, 1] / image_info[:, None, None, 2:3] prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds prediction_dict[constants.KEY_BOXES_2D] = proposals prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds if self.training: loss_dict = self.loss(prediction_dict, feed_dict) return prediction_dict, loss_dict else: return prediction_dict
def forward(self, feed_dict): output_dict = {} losses_dict = UncoverDict() auxiliary_dict = {} multi_stage_stats = [] # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict[constants.KEY_IMAGE]) feed_dict.update({'base_feat': base_feat}) # rpn model instance, losses = self.rpn_model.forward(feed_dict) # output_dict.update() auxiliary_dict.update(instance) proposals = auxiliary_dict[constants.KEY_BOXES_2D] for i in range(self.num_stages): if self.training: losses_units = self.instance_info.generate_losses( output_dict, feed_dict, auxiliary_dict) losses_units, subsampled_mask = self.sampler.subsample_instance( losses_units) proposals, _ = self.sampler.subsample_instance( proposals, subsampled_mask) # update auxiliary dict # TODO subsample for all auxiliary_dict auxiliary_dict[constants.KEY_BOXES_2D] = proposals auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[ constants.KEY_NUM_INSTANCES] multi_stage_stats.append( self.instance_info.generate_stats(auxiliary_dict)) rois = box_ops.box2rois(proposals) pooled_feat = self.rcnn_pooling(base_feat, rois.view(-1, 5), 1 / 16) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat = pooled_feat.mean(3).mean(2) # collect output from network to output_dict for attr_name in self.branches: attr_preds = self.branches[attr_name][i](pooled_feat) output_dict[attr_name] = attr_preds # unsqueeze before calc loss batch_size = rois.shape[0] output_dict = self.instance_info.unsqueeze(output_dict, batch_size) if self.training: losses_units.update_from_output(output_dict) # decode instance = self.instance_info.generate_instance( output_dict, auxiliary_dict) if self.training: losses_dict.update(losses) losses = self.instance_info.calc_loss(losses_units) losses_dict.update(losses) else: losses_dict = None multi_stage_stats = None # rescale im_info = feed_dict[constants.KEY_IMAGE_INFO] instance = self.instance_info.affine_transform(instance, im_info) return instance, losses_dict, multi_stage_stats