Example #1
0
    def gt_boxes(self, fmap, im_sizes, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None,
                 train_anchor_inds=None, proposals=None):
        """
        Gets GT boxes!
        :param fmap:
        :param im_sizes:
        :param image_offset:
        :param gt_boxes:
        :param gt_classes:
        :param gt_rels:
        :param train_anchor_inds:
        :return:
        """
        assert gt_boxes is not None
        im_inds = gt_classes[:, 0] - image_offset
        rois = torch.cat((im_inds.float()[:, None], gt_boxes), 1)
        if gt_rels is not None and self.training:
            rois, labels, rel_labels = proposal_assignments_gtbox(
                rois.data, gt_boxes.data, gt_classes.data, gt_rels.data, image_offset,
                fg_thresh=0.5)
        else:
            labels = gt_classes[:, 1]
            rel_labels = None

        return rois, labels, None, None, None, rel_labels
Example #2
0
    def gt_boxes(self, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None):
        """
        Gets Ground-Truth boxes.
        :param image_offset: offset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param gt_rels: [] gt relations
        :return rois, labels, rel_labels
        """
        assert gt_boxes is not None
        im_inds = gt_classes[:, 0] - image_offset
        rois = torch.cat((im_inds.float()[:, None], gt_boxes), 1)
        if gt_rels is not None and self.training:
            rois, labels, rel_labels = proposal_assignments_gtbox(
                rois.data, gt_boxes.data, gt_classes.data, gt_rels.data, image_offset,
                fg_thresh=0.5)
        else:
            labels = gt_classes[:, 1]
            rel_labels = None

        return rois, labels, rel_labels
Example #3
0
    def forward(self,
                x,
                im_sizes,
                image_offset,
                gt_boxes=None,
                gt_masks=None,
                gt_classes=None,
                gt_rels=None,
                pred_boxes=None,
                pred_masks=None,
                pred_fmaps=None,
                pred_dists=None):
        """
        Forward pass for detection
        :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE]
        :param im_sizes: A numpy array of (h, w, scale) for each image.
        :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes:

        Training parameters:
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will
                                  be used to compute the training loss. Each (img_ind, fpn_idx)
        :return: If train:
            scores, boxdeltas, labels, boxes, boxtargets, rpnscores, rpnboxes, rellabels

            if test:
            prob dists, boxes, img inds, maxscores, classes

        """

        result = Result()
        if self.training:
            im_inds = gt_classes[:, 0]
            rois = torch.cat((im_inds.float()[:, None], gt_boxes), 1)
            rois, labels, result.rel_labels = proposal_assignments_gtbox(
                rois.data, gt_boxes.data, gt_classes.data, gt_rels.data,
                image_offset)
            pred_boxes = gt_boxes
            pred_masks = gt_masks
            result.rm_obj_labels = gt_classes[:, 1]
        else:
            im_inds = pred_boxes[:, 0].long()
            pred_boxes = pred_boxes[:, 1:]
            result.rel_dists = None

        rel_inds = self.get_rel_inds(result.rel_labels, im_inds, pred_boxes)
        rois = torch.cat((im_inds[:, None].float(), pred_boxes), 1)
        visual_rep = self.visual_rep(pred_fmaps, rois, rel_inds[:, 1:])
        result.obj_fmap = self.obj_feature_map(pred_fmaps, rois)
        # Now do the approximation WHEREVER THERES A VALID RELATIONSHIP.
        result.rm_obj_dists, result.rel_dists = self.message_pass(
            F.relu(self.edge_unary(visual_rep)),
            self.obj_unary(result.obj_fmap), rel_inds[:, 1:])

        # result.box_deltas_update = box_deltas

        if self.training:
            return result
        scores_nz = F.softmax(result.rm_obj_dists).data
        scores_nz[:, 0] = 0.0
        result.obj_scores, score_ord = scores_nz[:, 1:].sort(dim=1,
                                                             descending=True)
        result.obj_preds = score_ord[:, 0] + 1
        result.obj_scores = result.obj_scores[:, 0]
        # # Decode here ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # if self.mode == 'predcls':
        #     # Hack to get the GT object labels
        #     result.obj_scores = result.rm_obj_dists.data.new(gt_classes.size(0)).fill_(1)
        #     result.obj_preds = gt_classes.data[:, 1]
        # elif self.mode == 'sgdet':
        #     order, obj_scores, obj_preds = filter_det(F.softmax(result.rm_obj_dists),
        #                                               pred_boxes,
        #                                               start_ind=0,
        #                                               max_per_img=100,
        #                                               thresh=0.00,
        #                                               pre_nms_topn=6000,
        #                                               post_nms_topn=300,
        #                                               nms_thresh=0.3,
        #                                               nms_filter_duplicates=True)
        #     idx, perm = torch.sort(order)
        #     result.obj_preds = rel_inds.new(result.rm_obj_dists.size(0)).fill_(1)
        #     result.obj_scores = result.rm_obj_dists.data.new(result.rm_obj_dists.size(0)).fill_(0)
        #     result.obj_scores[idx] = obj_scores.data[perm]
        #     result.obj_preds[idx] = obj_preds.data[perm]
        # else:
        #     scores_nz = F.softmax(result.rm_obj_dists).data
        #     scores_nz[:, 0] = 0.0
        #     result.obj_scores, score_ord = scores_nz[:, 1:].sort(dim=1, descending=True)
        #     result.obj_preds = score_ord[:, 0] + 1
        #     result.obj_scores = result.obj_scores[:, 0]

        result.obj_preds = Variable(result.obj_preds)
        result.obj_scores = Variable(result.obj_scores)

        # Set result's bounding boxes to be size
        # [num_boxes, topk, 4] instead of considering every single object assignment.
        # twod_inds = arange(result.obj_preds.data) * self.num_classes + result.obj_preds.data
        #
        # if self.mode == 'sgdet':
        #     bboxes = result.boxes_all.view(-1, 4)[twod_inds].view(result.boxes_all.size(0), 4)
        # else:
        #     # Boxes will get fixed by filter_dets function.
        #     bboxes = result.rm_box_priors
        rel_rep = F.softmax(result.rel_dists)

        return filter_dets_mask(pred_boxes, pred_masks, result.obj_scores,
                                result.obj_preds, rel_inds[:, 1:], rel_rep)
    def forward(self,
                x,
                im_sizes,
                image_offset,
                gt_boxes=None,
                gt_masks=None,
                gt_classes=None,
                gt_rels=None,
                pred_boxes=None,
                pred_masks=None,
                pred_fmaps=None,
                pred_dists=None):
        """
        Forward pass for detection
        :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE]
        :param im_sizes: A numpy array of (h, w, scale) for each image.
        :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes:

        Training parameters:
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will
                                  be used to compute the training loss. Each (img_ind, fpn_idx)
        :return: If train:
            scores, boxdeltas, labels, boxes, boxtargets, rpnscores, rpnboxes, rellabels

            if test:
            prob dists, boxes, img inds, maxscores, classes

        pred_fmaps  N*256*14*14
        pred_boxes  N*4
        pred_masks  N*28*28
        pred_dists  N*85

        """
        #print(pred_fmaps.shape, pred_boxes.shape, pred_masks.shape, pred_dists.shape)

        if self.training:
            im_inds = gt_classes[:, 0]
            rois = torch.cat((im_inds.float()[:, None], gt_boxes), 1)
            # actually is rel_assignment for sgcls
            # 指定rel的gt, roi不发生变化
            rois, labels, rel_labels = proposal_assignments_gtbox(
                rois.data, gt_boxes.data, gt_classes.data, gt_rels.data,
                image_offset)
            #boxes = rois[:, 1:]
            pred_boxes = rois[:, 1:]
            pred_masks = gt_masks
            pred_dists = Variable(to_onehot(labels.data, self.num_classes))
        else:
            im_inds = pred_boxes[:, 0].long()
            pred_boxes = pred_boxes[:, 1:]
            labels = gt_classes[:, 1]
            rel_labels = None
            pred_dists = Variable(
                to_onehot(pred_dists.data.long(), self.num_classes))
            rois = torch.cat((im_inds[:, None].float(), pred_boxes), 1)

        result = Result()
        #pred_fmaps = pred_fmaps * self.downsample(pred_masks[:, None, :, :])
        #result.obj_fmap = self.roi_fmap_obj(pred_fmaps.view(len(pred_fmaps), -1))
        result.obj_fmap = self.obj_feature_map(pred_fmaps, rois)
        result.rm_obj_dists = pred_dists
        result.rm_obj_labels = labels
        result.rel_labels = rel_labels
        #result.boxes_all = None
        rel_inds = self.get_rel_inds(result.rel_labels, im_inds, pred_boxes)
        #rois = torch.cat((im_inds[:, None].float(), boxes), 1)

        # result.obj_fmap = self.obj_feature_map(result.fmap, rois)
        #  print(pred_fmaps[0][0][0])
        #  print(result.rm_obj_labels[0])
        #  print(result.rm_obj_dists[0][:10])
        #  print(pred_boxes.data[[0]])
        # Prevent gradients from flowing back into score_fc from elsewhere
        result.rm_obj_dists, result.obj_preds, edge_ctx = self.context(
            result.obj_fmap, result.rm_obj_dists, im_inds, result.rm_obj_labels
            if self.training or self.mode == 'predcls' else None,
            pred_boxes.data, None)

        #print(fdsafds)
        if edge_ctx is None:
            edge_rep = self.post_emb(result.obj_preds)
        else:
            edge_rep = self.post_lstm(edge_ctx)

        # Split into subject and object representations
        edge_rep = edge_rep.view(edge_rep.size(0), 2, self.pooling_dim)

        subj_rep = edge_rep[:, 0]
        obj_rep = edge_rep[:, 1]

        prod_rep = subj_rep[rel_inds[:, 1]] * obj_rep[rel_inds[:, 2]]
        vr = self.visual_rep(pred_fmaps, rois, rel_inds[:, 1:])
        prod_rep = prod_rep * vr
        # if self.use_vision:
        #     vr = self.visual_rep(pred_fmaps, rois, rel_inds[:, 1:])
        #     if self.limit_vision:
        #         # exact value TBD
        #         prod_rep = torch.cat((prod_rep[:, :2048] * vr[:, :2048], prod_rep[:, 2048:]), 1)
        #     else:
        #         prod_rep = prod_rep * vr

        if self.use_tanh:
            prod_rep = F.tanh(prod_rep)

        result.rel_dists = self.rel_compress(prod_rep)

        if self.use_bias:
            result.rel_dists = result.rel_dists + self.freq_bias.index_with_labels(
                torch.stack((
                    result.obj_preds[rel_inds[:, 1]],
                    result.obj_preds[rel_inds[:, 2]],
                ), 1))

        if self.training:
            return result

        twod_inds = arange(
            result.obj_preds.data) * self.num_classes + result.obj_preds.data
        result.obj_scores = F.softmax(result.rm_obj_dists,
                                      dim=1).view(-1)[twod_inds]

        # # Bbox regression
        # if self.mode == 'sgdet':
        #     bboxes = result.boxes_all.view(-1, 4)[twod_inds].view(result.boxes_all.size(0), 4)
        # else:
        #     # Boxes will get fixed by filter_dets function.
        #     bboxes = result.rm_box_priors

        rel_rep = F.softmax(result.rel_dists, dim=1)
        return filter_dets_mask(pred_boxes, pred_masks, result.obj_scores,
                                result.obj_preds, rel_inds[:, 1:], rel_rep)