def forward(self, inputs, roidb, im_info):
        """
        Args:
            inputs: a list of [rpn_rois_fpn2, ..., rpn_rois_fpn6,
                               rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6]
            im_info: [[im_height, im_width, im_scale], ...]
        """
        rois = collect(inputs, self.training)
        if self.training:
            # During training we reuse the data loader code. We populate roidb
            # entries on the fly using the rois generated by RPN.
            im_scales = im_info.data.numpy()[:, 2]
            # For historical consistency with the original Faster R-CNN
            # implementation we are *not* filtering crowd proposals.
            # This choice should be investigated in the future (it likely does
            # not matter).
            json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
            # Compute training labels for the RPN proposals; also handles
            # distributing the proposals over FPN levels
            output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
            blobs = {k: [] for k in output_blob_names}
            roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
        else:
            # For inference we have a special code path that avoids some data
            # loader overhead
            blobs = distribute(rois, None)

        return blobs
    def forward(self, rpn_rois, roidb, im_info):
        """Op for generating training labels for RPN proposals. This is used
        when training RPN jointly with Fast/Mask R-CNN (as in end-to-end
        Faster R-CNN training).

        blobs_in:
          - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals
          - 'roidb': roidb entries that will be labeled
          - 'im_info': See GenerateProposals doc.

        blobs_out:
          - (variable set of blobs): returns whatever blobs are required for
            training the model. It does this by querying the data loader for
            the list of blobs that are needed.
        """
        im_scales = im_info.data.numpy()[:, 2]

        # ForkedPdb().set_trace()

        output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
        # For historical consistency with the original Faster R-CNN
        # implementation we are *not* filtering crowd proposals.
        # This choice should be investigated in the future (it likely does
        # not matter).
        # Note: crowd_thresh=0 will ignore _filter_crowd_proposals
        json_dataset.add_proposals(roidb, rpn_rois, im_scales, crowd_thresh=0)
        blobs = {k: [] for k in output_blob_names}
        roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)

        return blobs
Beispiel #3
0
 def forward(self, inputs, outputs):
     """See modeling.detector.CollectAndDistributeFpnRpnProposals for
     inputs/outputs documentation.
     """
     # inputs is
     # [rpn_rois_fpn2, ..., rpn_rois_fpn6,
     #  rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6]
     # If training with Faster R-CNN, then inputs will additionally include
     #  + [roidb, im_info]
     rois = self.collect(inputs, self._train)
     if self._train:
         # During training we reuse the data loader code. We populate roidb
         # entries on the fly using the rois generated by RPN.
         # im_info: [[im_height, im_width, im_scale], ...]
         im_info = inputs[-1].data
         im_scales = im_info[:, 2]
         roidb = blob_utils.deserialize(inputs[-2].data)
         # For historical consistency with the original Faster R-CNN
         # implementation we are *not* filtering crowd proposals.
         # This choice should be investigated in the future (it likely does
         # not matter).
         json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
         # Compute training labels for the RPN proposals; also handles
         # distributing the proposals over FPN levels
         output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
         blobs = {k: [] for k in output_blob_names}
         roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
         for i, k in enumerate(output_blob_names):
             blob_utils.py_op_copy_blob(blobs[k], outputs[i])
     else:
         # For inference we have a special code path that avoids some data
         # loader overhead
         self.distribute(rois, None, outputs, self._train)
 def forward(self, inputs, outputs):
     """See modeling.detector.CollectAndDistributeFpnRpnProposals for
     inputs/outputs documentation.
     """
     # inputs is
     # [rpn_rois_fpn2, ..., rpn_rois_fpn6,
     #  rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6]
     # If training with Faster R-CNN, then inputs will additionally include
     #  + [roidb, im_info]
     rois = collect(inputs, self._train)
     if self._train:
         # During training we reuse the data loader code. We populate roidb
         # entries on the fly using the rois generated by RPN.
         # im_info: [[im_height, im_width, im_scale], ...]
         im_info = inputs[-1].data
         im_scales = im_info[:, 2]
         roidb = blob_utils.deserialize(inputs[-2].data)
         # For historical consistency with the original Faster R-CNN
         # implementation we are *not* filtering crowd proposals.
         # This choice should be investigated in the future (it likely does
         # not matter).
         json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
         # Compute training labels for the RPN proposals; also handles
         # distributing the proposals over FPN levels
         output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
         blobs = {k: [] for k in output_blob_names}
         roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
         for i, k in enumerate(output_blob_names):
             blob_utils.py_op_copy_blob(blobs[k], outputs[i])
     else:
         # For inference we have a special code path that avoids some data
         # loader overhead
         distribute(rois, None, outputs, self._train)
    def forward(self, rpn_rois, roidb, im_info):
        """Op for generating training labels for RPN proposals. This is used
        when training RPN jointly with Fast/Mask R-CNN (as in end-to-end
        Faster R-CNN training).

        blobs_in:
          - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals
          - 'roidb': roidb entries that will be labeled
          - 'im_info': See GenerateProposals doc.

        blobs_out:
          - (variable set of blobs): returns whatever blobs are required for
            training the model. It does this by querying the data loader for
            the list of blobs that are needed.
        """
        im_scales = im_info.data.numpy()[:, 2]

        output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
        # For historical consistency with the original Faster R-CNN
        # implementation we are *not* filtering crowd proposals.
        # This choice should be investigated in the future (it likely does
        # not matter).
        # Note: crowd_thresh=0 will ignore _filter_crowd_proposals
        json_dataset.add_proposals(roidb, rpn_rois, im_scales, crowd_thresh=0)
        blobs = {k: [] for k in output_blob_names}
        roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)

        return blobs
    def forward(self, inputs, roidb, im_info):
        """
        Args:
            inputs: a list of [rpn_rois_fpn2, ..., rpn_rois_fpn6,
                               rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6]
            im_info: [[im_height, im_width, im_scale], ...]
        """
        rois = collect(inputs, self.training)
        if self.training:
            # During training we reuse the data loader code. We populate roidb
            # entries on the fly using the rois generated by RPN.
            im_scales = im_info.data.numpy()[:, 2]
            # For historical consistency with the original Faster R-CNN
            # implementation we are *not* filtering crowd proposals.
            # This choice should be investigated in the future (it likely does
            # not matter).
            json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
            # Compute training labels for the RPN proposals; also handles
            # distributing the proposals over FPN levels
            output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
            blobs = {k: [] for k in output_blob_names}
            roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
        else:
            # For inference we have a special code path that avoids some data
            # loader overhead
            blobs = distribute(rois, None)

        return blobs
def get_new_blobs(rois, im_info, roidb, pos_iou=0):
    im_scales = im_info.data.numpy()[:, 2]
    output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
    # pdb.set_trace()
    json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
    blobs = {k: [] for k in output_blob_names}
    add_fast_rcnn_blobs(blobs, im_scales, roidb, pos_iou)

    return blobs
Beispiel #8
0
    def forward(self, inputs, roidb, im_info):
        """
        Args:
            inputs: a list of [rpn_rois_fpn2, ..., rpn_rois_fpn6,
                               rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6]
            im_info: [[im_height, im_width, im_scale], ...]
        """
        rois = collect(inputs, self.training)
        if self.training:
            # During training we reuse the data loader code. We populate roidb
            # entries on the fly using the rois generated by RPN.
            im_scales = im_info.data.numpy()[:, 2]
            # For historical consistency with the original Faster R-CNN
            # implementation we are *not* filtering crowd proposals.
            # This choice should be investigated in the future (it likely does
            # not matter).
            if len(cfg.TRAIN.DATASETS) == 1:
                dataset_name = cfg.TRAIN.DATASETS[0]
                if 'coco' in dataset_name:
                    json_dataset.add_proposals(roidb,
                                               rois,
                                               im_scales,
                                               crowd_thresh=0)
                elif 'davis' in dataset_name:
                    davis_db.add_proposals(roidb,
                                           rois,
                                           im_scales,
                                           crowd_thresh=0)
            else:
                json_dataset.add_proposals(roidb,
                                           rois,
                                           im_scales,
                                           crowd_thresh=0)
            # Compute training labels for the RPN proposals; also handles
            # distributing the proposals over FPN levels
            output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
            blobs = {k: [] for k in output_blob_names}
            if cfg.CONVGRU.DYNAMIC_MODEL:
                assert len(
                    roidb
                ) == 1, 'The batch size should be one, so the running graph can be the same.(has gt or no gt should be consistant).'
                # No gt.
                if not len(np.where(roidb[0]['gt_classes'] > 0)[0]) > 0:
                    return blobs
            roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
        else:
            # For inference we have a special code path that avoids some data
            # loader overhead
            blobs = distribute(rois, None)

        return blobs
 def forward(self, inputs, outputs):
     # During training we reuse the data loader code. We populate roidb
     # entries on the fly using the rois generated by RPN.
     # im_info: [[im_height, im_width, im_scale], ...]
     rois = inputs[0].data
     roidb = blob_utils.deserialize(inputs[1].data)
     im_info = inputs[2].data
     im_scales = im_info[:, 2]
     output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
     json_dataset.add_proposals(roidb, rois, im_scales)
     blobs = {k: [] for k in output_blob_names}
     roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
     for i, k in enumerate(output_blob_names):
         blob_utils.py_op_copy_blob(blobs[k], outputs[i])
Beispiel #10
0
 def forward(self, inputs, outputs):
     # During training we reuse the data loader code. We populate roidb
     # entries on the fly using the rois generated by RPN.
     # im_info: [[im_height, im_width, im_scale], ...]
     rois = inputs[0].data
     roidb = blob_utils.deserialize(inputs[1].data)
     im_info = inputs[2].data
     im_scales = im_info[:, 2]
     output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
     json_dataset.add_proposals(roidb, rois, im_scales)
     blobs = {k: [] for k in output_blob_names}
     roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
     for i, k in enumerate(output_blob_names):
         blob_utils.py_op_copy_blob(blobs[k], outputs[i])
Beispiel #11
0
 def forward(self, inputs, outputs):
     """See modeling.detector.GenerateProposalLabels for inputs/outputs
     documentation.
     """
     # During training we reuse the data loader code. We populate roidb
     # entries on the fly using the rois generated by RPN.
     # im_info: [[im_height, im_width, im_scale], ...]
     rois = inputs[0].data
     roidb = blob_utils.deserialize(inputs[1].data)
     im_info = inputs[2].data
     im_scales = im_info[:, 2]
     output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
     # For historical consistency with the original Faster R-CNN
     # implementation we are *not* filtering crowd proposals.
     # This choice should be investigated in the future (it likely does
     # not matter).
     json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
     blobs = {k: [] for k in output_blob_names}
     roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
     for i, k in enumerate(output_blob_names):
         blob_utils.py_op_copy_blob(blobs[k], outputs[i])
 def forward(self, inputs, outputs):
     """See modeling.detector.GenerateProposalLabels for inputs/outputs
     documentation.
     """
     # During training we reuse the data loader code. We populate roidb
     # entries on the fly using the rois generated by RPN.
     # im_info: [[im_height, im_width, im_scale], ...]
     rois = inputs[0].data
     roidb = blob_utils.deserialize(inputs[1].data)
     im_info = inputs[2].data
     im_scales = im_info[:, 2]
     output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
     # For historical consistency with the original Faster R-CNN
     # implementation we are *not* filtering crowd proposals.
     # This choice should be investigated in the future (it likely does
     # not matter).
     json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
     blobs = {k: [] for k in output_blob_names}
     roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
     for i, k in enumerate(output_blob_names):
         blob_utils.py_op_copy_blob(blobs[k], outputs[i])
Beispiel #13
0
    def forward(self, inputs, roidb, im_info):
        """
        Args:
            inputs: a list of [rpn_rois_fpn2, ..., rpn_rois_fpn6,
                               rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6] # (2905, 5)
            im_info: [[im_height, im_width, im_scale], ...]
        """
        num_img = im_info.shape[0]
        rois = collect(inputs, self.training, num_img)

        # During training we reuse the data loader code. We populate roidb
        # entries on the fly using the rois generated by RPN.
        im_scales = im_info.data.numpy()[:, 2]
        # For historical consistency with the original Faster R-CNN
        # implementation we are *not* filtering crowd proposals.
        # This choice should be investigated in the future (it likely does
        # not matter).

        ### add 2000 rois in roidb
        json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)

        # Compute training labels for the RPN proposals; also handles
        # distributing the proposals over FPN levels
        # output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()

        output_blob_names = ['rois']
        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
            # Support for FPN multi-level rois without bbox reg isn't
            # implemented (... and may never be implemented)
            k_max = cfg.FPN.ROI_MAX_LEVEL
            k_min = cfg.FPN.ROI_MIN_LEVEL
            # Same format as rois blob, but one per FPN level
            for lvl in range(k_min, k_max + 1):
                output_blob_names += ['rois_fpn' + str(lvl)]
            output_blob_names += ['rois_idx_restore_int32']

        blobs = {k: [] for k in output_blob_names}
        roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)

        return blobs
Beispiel #14
0
 def forward(self, inputs, outputs):
     # inputs is
     # [rpn_rois_fpn2, ..., rpn_rois_fpn6,
     #  rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6]
     # If training with Faster R-CNN, then inputs will additionally include
     #  + [roidb, im_info]
     rois = collect(inputs, self._train)
     if self._train:
         # During training we reuse the data loader code. We populate roidb
         # entries on the fly using the rois generated by RPN.
         # im_info: [[im_height, im_width, im_scale], ...]
         im_info = inputs[-1].data
         im_scales = im_info[:, 2]
         roidb = blob_utils.deserialize(inputs[-2].data)
         output_blob_names = roi_data.fast_rcnn.get_fast_rcnn_blob_names()
         json_dataset.add_proposals(roidb, rois, im_scales)
         blobs = {k: [] for k in output_blob_names}
         roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
         for i, k in enumerate(output_blob_names):
             blob_utils.py_op_copy_blob(blobs[k], outputs[i])
     else:
         # For inference we have a special code path that avoids some data
         # loader overhead
         distribute(rois, None, outputs, self._train)
Beispiel #15
0
    def _forward(self, data, im_info, roidb=None, **rpn_kwargs):
        im_data = data
        if self.training:
            # roidb: list, length = batch size
            # 'has_visible_keypoints': bool
            # 'boxes' & 'gt_classes': object bboxes and classes
            # 'segms', 'seg_areas', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map': pass
            # 'gt_actions': num_box*26
            # 'gt_role_id': num_box*26*2
            roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb))

        device_id = im_data.get_device()

        return_dict = {}  # A dict to collect return variables

        blob_conv = self.Conv_Body(im_data)

        # Original RPN module will generate proposals, and sample 256 positive/negative
        # examples in a 1:3 ratio for r-cnn stage. For InteractNet(hoi), I set
        # cfg.TRAIN.BATCH_SIZE_PER_IM and cfg.TRAIN.FG_FRACTION big value, to save
        # every proposal in rpn_ret, then I will re-sample from rpn_ret for three branch
        # of InteractNet, see roi_data/hoi_data.py for more information.
        if not cfg.VCOCO.USE_PRECOMP_BOX:
            rpn_ret = self.RPN(blob_conv, im_info, roidb)
            if cfg.MODEL.VCOCO_ON and self.training:
                # WARNING! always sample hoi branch before detection branch when training
                hoi_blob_in = sample_for_hoi_branch(rpn_ret,
                                                    roidb,
                                                    im_info,
                                                    is_training=True)
                # Re-sampling for RCNN head, rpn_ret will be modified inplace
                sample_for_detection_branch(rpn_ret)
        elif self.training:
            json_dataset.add_proposals(roidb,
                                       rois=None,
                                       im_info=im_info.data.numpy(),
                                       crowd_thresh=0)  #[:, 2]
            hoi_blob_in = sample_for_hoi_branch_precomp_box_train(
                roidb, im_info, is_training=True)
            if hoi_blob_in is None:
                return_dict['losses'] = {}
                return_dict['metrics'] = {}
                return_dict['losses'][
                    'loss_hoi_interaction_action'] = torch.tensor(
                        [0.]).cuda(device_id)
                return_dict['metrics'][
                    'accuracy_interaction_cls'] = torch.tensor(
                        [0.]).cuda(device_id)
                return_dict['losses'][
                    'loss_hoi_interaction_affinity'] = torch.tensor(
                        [0.]).cuda(device_id)
                return_dict['metrics'][
                    'accuracy_interaction_affinity'] = torch.tensor(
                        [0.]).cuda(device_id)
                return return_dict

        if cfg.FPN.FPN_ON:
            # Retain only the blobs that will be used for RoI heads. `blob_conv` may include
            # extra blobs that are used for RPN proposals, but not for RoI heads.
            #blob_conv = blob_conv[-self.num_roi_levels:]
            if cfg.FPN.MULTILEVEL_ROIS:
                blob_conv = blob_conv[-self.num_roi_levels:]
            else:
                blob_conv = blob_conv[-1]

        if not self.training:
            return_dict['blob_conv'] = blob_conv

        if not cfg.VCOCO.USE_PRECOMP_BOX:
            if not cfg.MODEL.RPN_ONLY:
                if cfg.MODEL.SHARE_RES5 and self.training:
                    box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret)
                else:
                    box_feat = self.Box_Head(blob_conv, rpn_ret)
                cls_score, bbox_pred = self.Box_Outs(box_feat)
            else:
                # TODO: complete the returns for RPN only situation
                pass

        if self.training:
            return_dict['losses'] = {}
            return_dict['metrics'] = {}
            # rpn loss
            if not cfg.VCOCO.USE_PRECOMP_BOX:
                rpn_kwargs.update(
                    dict((k, rpn_ret[k]) for k in rpn_ret.keys()
                         if (k.startswith('rpn_cls_logits')
                             or k.startswith('rpn_bbox_pred'))))
                loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses(
                    **rpn_kwargs)
                if cfg.FPN.FPN_ON:
                    for i, lvl in enumerate(
                            range(cfg.FPN.RPN_MIN_LEVEL,
                                  cfg.FPN.RPN_MAX_LEVEL + 1)):
                        return_dict['losses']['loss_rpn_cls_fpn%d' %
                                              lvl] = loss_rpn_cls[i]
                        return_dict['losses']['loss_rpn_bbox_fpn%d' %
                                              lvl] = loss_rpn_bbox[i]
                else:
                    return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls
                    return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox

                # bbox loss
                loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses(
                    cls_score, bbox_pred, rpn_ret['labels_int32'],
                    rpn_ret['bbox_targets'], rpn_ret['bbox_inside_weights'],
                    rpn_ret['bbox_outside_weights'])
                return_dict['losses']['loss_cls'] = loss_cls
                return_dict['losses']['loss_bbox'] = loss_bbox
                return_dict['metrics']['accuracy_cls'] = accuracy_cls

                if cfg.MODEL.MASK_ON:
                    if getattr(self.Mask_Head, 'SHARE_RES5', False):
                        mask_feat = self.Mask_Head(
                            res5_feat,
                            rpn_ret,
                            roi_has_mask_int32=rpn_ret['roi_has_mask_int32'])
                    else:
                        mask_feat = self.Mask_Head(blob_conv, rpn_ret)
                    mask_pred = self.Mask_Outs(mask_feat)
                    # return_dict['mask_pred'] = mask_pred
                    # mask loss
                    loss_mask = mask_rcnn_heads.mask_rcnn_losses(
                        mask_pred, rpn_ret['masks_int32'])
                    return_dict['losses']['loss_mask'] = loss_mask

                if cfg.MODEL.KEYPOINTS_ON:
                    if getattr(self.Keypoint_Head, 'SHARE_RES5', False):
                        # No corresponding keypoint head implemented yet (Neither in Detectron)
                        # Also, rpn need to generate the label 'roi_has_keypoints_int32'
                        kps_feat = self.Keypoint_Head(
                            res5_feat,
                            rpn_ret,
                            roi_has_keypoints_int32=rpn_ret[
                                'roi_has_keypoint_int32'])
                    else:
                        kps_feat = self.Keypoint_Head(blob_conv, rpn_ret)
                    kps_pred = self.Keypoint_Outs(kps_feat)
                    # return_dict['keypoints_pred'] = kps_pred
                    # keypoints loss
                    if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
                        loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                            kps_pred, rpn_ret['keypoint_locations_int32'],
                            rpn_ret['keypoint_weights'])
                    else:
                        loss_keypoints = keypoint_rcnn_heads.keypoint_losses(
                            kps_pred, rpn_ret['keypoint_locations_int32'],
                            rpn_ret['keypoint_weights'],
                            rpn_ret['keypoint_loss_normalizer'])
                    return_dict['losses']['loss_kps'] = loss_keypoints

            if cfg.MODEL.VCOCO_ON:
                hoi_blob_out = self.HOI_Head(blob_conv, hoi_blob_in)

                interaction_action_loss, interaction_affinity_loss, \
                interaction_action_accuray_cls, interaction_affinity_cls = self.HOI_Head.loss(
                    hoi_blob_out)

                return_dict['losses'][
                    'loss_hoi_interaction_action'] = interaction_action_loss
                return_dict['metrics'][
                    'accuracy_interaction_cls'] = interaction_action_accuray_cls
                return_dict['losses'][
                    'loss_hoi_interaction_affinity'] = interaction_affinity_loss
                return_dict['metrics'][
                    'accuracy_interaction_affinity'] = interaction_affinity_cls

            # pytorch0.4 bug on gathering scalar(0-dim) tensors
            for k, v in return_dict['losses'].items():
                return_dict['losses'][k] = v.unsqueeze(0)
            for k, v in return_dict['metrics'].items():
                return_dict['metrics'][k] = v.unsqueeze(0)

        else:
            if not cfg.VCOCO.USE_PRECOMP_BOX:
                return_dict['rois'] = rpn_ret['rois']
                return_dict['cls_score'] = cls_score
                return_dict['bbox_pred'] = bbox_pred

        #print('return ready')
        return return_dict