Esempio n. 1
0
    def forward(self, obj_fmaps, obj_logits, vr, rel_inds, obj_labels=None, boxes_per_cls=None):
        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes))
        else:
            obj_dists2 = obj_logits

        if self.mode == 'sgdet' and not self.training:
            # NMS here for baseline
            probs = F.softmax(obj_dists2, 1)
            nms_mask = obj_dists2.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_dists2.size(1)):
                scores_ci = probs.data[:, c_i]
                boxes_ci = boxes_per_cls.data[:, c_i]

                keep = apply_nms(scores_ci, boxes_ci,
                                    pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0),
                                    nms_thresh=0.3)
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * probs.data, volatile=True)[:,1:].max(1)[1] + 1
        else:
            obj_preds = obj_labels if obj_labels is not None else obj_dists2[:,1:].max(1)[1] + 1

        f_obj_rel = torch.stack([torch.cat([obj_fmaps[rel_ind[1]], 
                                                 obj_fmaps[rel_ind[2]], 
                                                 vr[index]])
                                     for index, rel_ind in enumerate(rel_inds)])
        rel_dists = self.vr_fc(f_obj_rel)

        return obj_dists2, obj_preds, rel_dists       
Esempio n. 2
0
    def forward(self, obj_logits, vr, obj_labels=None, boxes_per_cls=None):
        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_obj_cls))
        else:
            obj_dists2 = obj_logits

        if self.mode == 'sgdet' and not self.training:
            # NMS here for baseline
            probs = F.softmax(obj_dists2, 1)
            nms_mask = obj_dists2.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_dists2.size(1)):
                scores_ci = probs.data[:, c_i]
                boxes_ci = boxes_per_cls.data[:, c_i]

                keep = apply_nms(scores_ci,
                                 boxes_ci,
                                 pre_nms_topn=scores_ci.size(0),
                                 post_nms_topn=scores_ci.size(0),
                                 nms_thresh=0.3)
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * probs.data,
                                 volatile=True)[:, 1:].max(1)[1] + 1
        else:
            obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(
                1)[1] + 1

        rel_dists = self.vr_fc(vr)

        return obj_dists2, obj_preds, rel_dists
Esempio n. 3
0
    def forward(self,
                obj_fmaps,
                obj_logits,
                rel_inds,
                vr,
                obj_labels=None,
                boxes_per_cls=None):
        """
        Reason relationship classes using knowledge of object and relationship coccurrence.
        """

        # print(rel_inds.shape)
        # (num_rel, 3)
        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_obj_cls))
        else:
            obj_dists2 = obj_logits

        if self.mode == 'sgdet' and not self.training:
            # NMS here for baseline
            probs = F.softmax(obj_dists2, 1)
            nms_mask = obj_dists2.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_dists2.size(1)):
                scores_ci = probs.data[:, c_i]
                boxes_ci = boxes_per_cls.data[:, c_i]

                keep = apply_nms(scores_ci,
                                 boxes_ci,
                                 pre_nms_topn=scores_ci.size(0),
                                 post_nms_topn=scores_ci.size(0),
                                 nms_thresh=0.3)
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * probs.data,
                                 volatile=True)[:, 1:].max(1)[1] + 1
        else:
            obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(
                1)[1] + 1

        sub_obj_preds = torch.cat((obj_preds[rel_inds[:, 1]].view(
            -1, 1), obj_preds[rel_inds[:, 2]].view(-1, 1)), 1)

        obj_fmaps = self.obj_proj(obj_fmaps)
        vr = self.rel_proj(vr)
        input_ggnn = torch.stack([
            torch.cat([
                obj_fmaps[rel_ind[1]].unsqueeze(0),
                obj_fmaps[rel_ind[2]].unsqueeze(0), vr[index].repeat(
                    self.num_rel_cls, 1)
            ], 0) for index, rel_ind in enumerate(rel_inds)
        ])

        rel_dists = self.ggnn_rel(rel_inds[:, 1:], sub_obj_preds, input_ggnn)

        return obj_dists2, obj_preds, rel_dists
Esempio n. 4
0
    def forward(self,
                obj_fmaps,
                obj_logits,
                im_inds,
                obj_labels=None,
                box_priors=None,
                boxes_per_cls=None):
        """
        Forward pass through the object and edge context
        :param obj_priors:
        :param obj_fmaps:
        :param im_inds:
        :param obj_labels:
        :param boxes:
        :return:
        """

        obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight

        pos_embed = self.pos_embed(center_size(box_priors))
        # obj_pre_rep = self.conver_fusion_feature(torch.cat((obj_fmaps, obj_embed, pos_embed), 1))
        obj_pre_rep = self.conver_fusion_feature(
            torch.cat((obj_embed, pos_embed), 1))
        # UNSURE WHAT TO DO HERE
        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes))
        else:
            obj_dists2 = self.decoder_lin(obj_pre_rep)

        if self.mode == 'sgdet' and not self.training:
            # NMS here for baseline
            probs = F.softmax(obj_dists2, 1)
            nms_mask = obj_dists2.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_dists2.size(1)):
                scores_ci = probs.data[:, c_i]
                boxes_ci = boxes_per_cls.data[:, c_i]

                keep = apply_nms(scores_ci,
                                 boxes_ci,
                                 pre_nms_topn=scores_ci.size(0),
                                 post_nms_topn=scores_ci.size(0),
                                 nms_thresh=0.3)
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * probs.data,
                                 volatile=True)[:, 1:].max(1)[1] + 1
        else:
            obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(
                1)[1] + 1

        return obj_dists2, obj_preds, obj_pre_rep
Esempio n. 5
0
def filter_roi_proposals(box_preds, class_preds, boxes_per_im, nms_thresh=0.7, pre_nms_topn=12000, post_nms_topn=2000):
    inds, im_per = apply_nms(
        class_preds,
        box_preds,
        pre_nms_topn=pre_nms_topn,
        post_nms_topn=post_nms_topn,
        boxes_per_im=boxes_per_im,
        nms_thresh=nms_thresh,
    )
    img_inds = torch.cat([val * torch.ones(i) for val, i in enumerate(im_per)], 0).cuda(
        box_preds.get_device())
    rois = torch.cat((img_inds[:, None], box_preds[inds]), 1)
    return rois
Esempio n. 6
0
    def forward(self, im_inds, obj_fmaps, obj_logits, rel_inds, vr, obj_labels=None, boxes_per_cls=None):
        """
        Reason relationship classes using knowledge of object and relationship coccurrence.
        """

        # print(rel_inds.shape)
        # (num_rel, 3)
        if self.mode == 'predcls':
            obj_logits = Variable(onehot_logits(obj_labels.data, self.num_obj_cls))
        obj_probs = F.softmax(obj_logits, 1)

        obj_fmaps = self.obj_proj(obj_fmaps)
        vr = self.rel_proj(vr)
        
        rel_logits = []
        obj_logits_refined = []
        for (_, obj_s, obj_e), (_, rel_s, rel_e) in zip(enumerate_by_image(im_inds.data), enumerate_by_image(rel_inds[:,0])):        
            rl, ol = self.ggnn(rel_inds[rel_s:rel_e, 1:] - obj_s, obj_probs[obj_s:obj_e], obj_fmaps[obj_s:obj_e], vr[rel_s:rel_e])
            rel_logits.append(rl)
            obj_logits_refined.append(ol)

        rel_logits = torch.cat(rel_logits, 0)
        
        if self.ggnn.refine_obj_cls:
            obj_logits_refined = torch.cat(obj_logits_refined, 0)
            obj_logits = obj_logits_refined

        obj_probs = F.softmax(obj_logits, 1)
        if self.mode == 'sgdet' and not self.training:
            # NMS here for baseline            
            nms_mask = obj_probs.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_probs.size(1)):
                scores_ci = obj_probs.data[:, c_i]
                boxes_ci = boxes_per_cls.data[:, c_i]

                keep = apply_nms(scores_ci, boxes_ci,
                                    pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0),
                                    nms_thresh=0.3)
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * obj_probs.data, volatile=True)[:,1:].max(1)[1] + 1
        else:
            obj_preds = obj_labels if obj_labels is not None else obj_probs[:,1:].max(1)[1] + 1
            
        return obj_logits, obj_preds, rel_logits
Esempio n. 7
0
def filter_roi_proposals(box_preds,
                         class_preds,
                         boxes_per_im,
                         nms_thresh=0.7,
                         pre_nms_topn=12000,
                         post_nms_topn=2000):
    # print("*** filter_roi_proposals ***")
    # print("pre_nms_topn", pre_nms_topn) # 6000
    # print("post_nms_topn", post_nms_topn) # 1000
    inds, im_per = apply_nms(
        class_preds,
        box_preds,
        pre_nms_topn=pre_nms_topn,
        post_nms_topn=post_nms_topn,
        boxes_per_im=boxes_per_im,
        nms_thresh=nms_thresh,
    )
    img_inds = torch.cat([val * torch.ones(i) for val, i in enumerate(im_per)],
                         0).cuda(box_preds.get_device())
    rois = torch.cat((img_inds[:, None], box_preds[inds]), 1)
    # print("filter_roi_proposals rois.shape", rois.shape)
    return rois
Esempio n. 8
0
def filter_det(scores, boxes, start_ind=0, max_per_img=100, thresh=0.001, pre_nms_topn=6000,
               post_nms_topn=300, nms_thresh=0.3, nms_filter_duplicates=True):
    """
    Filters the detections for a single image
    :param scores: [num_rois, num_classes]
    :param boxes: [num_rois, num_classes, 4]. Assumes the boxes have been clamped
    :param max_per_img: Max detections per image
    :param thresh: Threshold for calling it a good box
    :param nms_filter_duplicates: True if we shouldn't allow for mulitple detections of the
           same box (with different labels)
    :return: A numpy concatenated array with up to 100 detections/img [num_im, x1, y1, x2, y2, score, cls]
    """

    valid_cls = (scores[:, 1:].data.max(0)[0] > thresh).nonzero() + 1
    if valid_cls.dim() == 0:
        return None

    nms_mask = scores.data.clone()
    nms_mask.zero_()

    for c_i in valid_cls.squeeze(1).cpu():
        scores_ci = scores.data[:, c_i]
        boxes_ci = boxes.data[:, c_i]

        keep = apply_nms(scores_ci, boxes_ci,
                         pre_nms_topn=pre_nms_topn, post_nms_topn=post_nms_topn,
                         nms_thresh=nms_thresh)
        nms_mask[:, c_i][keep] = 1

    dists_all = Variable(nms_mask * scores.data, volatile=True)

    if nms_filter_duplicates:
        scores_pre, labels_pre = dists_all.data.max(1)
        inds_all = scores_pre.nonzero()
        assert inds_all.dim() != 0
        inds_all = inds_all.squeeze(1)

        labels_all = labels_pre[inds_all]
        scores_all = scores_pre[inds_all]
    else:
        nz = nms_mask.nonzero()
        assert nz.dim() != 0
        inds_all = nz[:, 0]
        labels_all = nz[:, 1]
        scores_all = scores.data.view(-1)[inds_all * scores.data.size(1) + labels_all]

    # dists_all = dists_all[inds_all]
    # dists_all[:,0] = 1.0-dists_all.sum(1)

    # # Limit to max per image detections
    vs, idx = torch.sort(scores_all, dim=0, descending=True)
    idx = idx[vs > thresh]
    if max_per_img < idx.size(0):
        idx = idx[:max_per_img]

    inds_all = inds_all[idx] + start_ind
    scores_all = Variable(scores_all[idx], volatile=True)
    labels_all = Variable(labels_all[idx], volatile=True)
    # dists_all = dists_all[idx]

    return inds_all, scores_all, labels_all
Esempio n. 9
0
    def forward(self,
                obj_fmaps,
                obj_logits,
                im_inds,
                obj_labels=None,
                box_priors=None,
                boxes_per_cls=None):
        """
        Forward pass through the object and edge context
        :param obj_priors: from faster rcnn output boxes
        :param obj_fmaps: 4096-dim roi feature maps
        :param obj_logits: result.rm_obj_dists.detach()
        :param im_inds:
        :param obj_labels: od_obj_labels, gt
        :param boxes:
        :return: obj_dists2: [#boxes, 151], new score for boxes
                 obj_preds: [#boxes], prediction/class value
                 edge_ctx: [#boxes, 512], new features for boxes

        """

        # Object State:
        # obj_embed: [#boxes, 200], and self.obj_embed.weight are both Variable
        # obj_logits: result.rm_obj_dists.detach(), [#boxes, 151], detector scores before softmax
        obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight
        # center_size returns boxes as (center_x, center_y, width, height)
        # pos_embed: [#boxes, 128], Variable, from boxes after Sequential processing
        pos_embed = self.pos_embed(Variable(center_size(box_priors)))
        # obj_pre_rep: [#boxes, 4424], Variable
        obj_pre_rep = torch.cat((obj_fmaps, obj_embed, pos_embed), 1)

        if self.nl_obj > 0:
            # obj_dists2: [#boxes, 151], new score for box
            # obj_preds: [#boxes], prediction/class value
            # obj_ctx: [#boxes, 512], new features vector for box
            obj_dists2, obj_preds, obj_ctx = self.obj_ctx(
                obj_pre_rep,  #obj_fmaps,  # original: obj_pre_rep,
                obj_logits,
                im_inds,
                obj_labels,
                box_priors,
                boxes_per_cls,
            )
        else:
            # UNSURE WHAT TO DO HERE
            if self.mode == 'predcls':
                obj_dists2 = Variable(
                    to_onehot(obj_labels.data, self.num_classes))
            else:
                obj_dists2 = self.decoder_lin(obj_pre_rep)

            if self.mode == 'sgdet' and not self.training:
                # NMS here for baseline

                probs = F.softmax(obj_dists2, 1)
                nms_mask = obj_dists2.data.clone()
                nms_mask.zero_()
                for c_i in range(1, obj_dists2.size(1)):
                    scores_ci = probs.data[:, c_i]
                    boxes_ci = boxes_per_cls.data[:, c_i]

                    keep = apply_nms(scores_ci,
                                     boxes_ci,
                                     pre_nms_topn=scores_ci.size(0),
                                     post_nms_topn=scores_ci.size(0),
                                     nms_thresh=0.3)
                    nms_mask[:, c_i][keep] = 1

                obj_preds = Variable(nms_mask * probs.data,
                                     volatile=True)[:, 1:].max(1)[1] + 1
            else:
                obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(
                    1)[1] + 1
            obj_ctx = obj_pre_rep

        # Edge State:
        edge_ctx = None

        if self.nl_edge > 0:
            # edge_ctx: [#boxes, 512]
            edge_ctx = self.edge_ctx(
                torch.cat((obj_fmaps, obj_ctx), 1)
                if self.pass_in_obj_feats_to_edge else obj_ctx,
                obj_dists=obj_dists2.detach(),  # Was previously obj_logits.
                im_inds=im_inds,
                obj_preds=obj_preds,
                box_priors=box_priors,
            )

        return obj_dists2, obj_preds, edge_ctx
Esempio n. 10
0
    def forward(self, obj_fmaps, obj_logits, im_inds, obj_labels=None, box_priors=None, boxes_per_cls=None, batch_size=None,
                rois=None, od_box_deltas=None, im_sizes=None, image_offset=None, gt_classes=None, gt_boxes=None, ):
        """
        Forward pass through the object and edge context
        :param obj_priors:
        :param obj_fmaps:
        :param im_inds:
        :param obj_labels:
        :param boxes:
        :return:
        """
        obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight
        pos_embed = self.pos_embed(Variable(center_size(box_priors)))
        obj_pre_rep = torch.cat((obj_fmaps, obj_embed, pos_embed), 1)


        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes))
        else:
            if self.mode == 'sgcls':

                obj_dists2 = self.decoder_lin1(obj_pre_rep)
                obj_dists2 = self.decoder_lin2(obj_dists2.view(-1, 1, 1024), 1)

                obj_dists2 = obj_dists2[1]

                obj_dists2 = self.decoder_lin3(obj_dists2.view(-1, 1024))

            else:
                # this is for sgdet

                obj_dists2 = self.decoder_lin1(obj_pre_rep)

                perm, inv_perm, ls_transposed = self.sort_rois(im_inds.data, None, box_priors)
                obj_dists2 = obj_dists2[perm].contiguous()
                obj_dists2 = PackedSequence(obj_dists2, torch.tensor(ls_transposed))
                obj_dists2, lengths1 = pad_packed_sequence(obj_dists2, batch_first=False)


                obj_dists2 = self.decoder_lin2(obj_dists2.view(-1, batch_size, 1024), batch_size)[1]


                obj_dists2, _ = pack_padded_sequence(obj_dists2, lengths1, batch_first=False)
                obj_dists2 = self.decoder_lin3(obj_dists2.view(-1, 1024))
                obj_dists2 = obj_dists2[inv_perm]


                if (not self.training and not self.mode == 'gtbox') or self.mode in ('sgdet', 'refinerels'):
                    # try: dont apply nms here, but after own obj_classifier
                    nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes(
                        obj_dists2.clone().detach(),
                        rois,
                        od_box_deltas.clone().detach(), im_sizes,
                    )
                    im_inds = nms_imgs + image_offset
                    obj_dists2 = obj_dists2[nms_inds]
                    obj_fmap = obj_fmaps[nms_inds]
                    box_deltas = od_box_deltas[nms_inds]
                    box_priors = nms_boxes[:, 0]
                    rois = rois[nms_inds]

                    if self.training and not self.mode == 'gtbox':
                        # NOTE: If we're doing this during training, we need to assign labels here.
                        pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data
                        pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0

                        max_overlaps, argmax_overlaps = pred_to_gtbox.max(1)
                        rm_obj_labels = gt_classes[:, 1][argmax_overlaps]
                        rm_obj_labels[max_overlaps < 0.5] = 0
                    else:
                        rm_obj_labels = None

        if self.mode == 'sgdet' and not self.training:  # have tried in training
            # NMS here for baseline

            probs = F.softmax(obj_dists2, 1)
            nms_mask = obj_dists2.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_dists2.size(1)):
                scores_ci = probs.data[:, c_i]
                boxes_ci = nms_boxes.data[:, c_i]

                keep = apply_nms(scores_ci, boxes_ci,
                                 pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0),
                                 nms_thresh=0.5)#nms_thresh= 0.3 default
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * probs.data, volatile=True)[:, 1:].max(1)[1] + 1  # this for sgdet test

            #obj_preds=obj_dists2[:,1:].max(1)[1] + 1
        else:
            if self.mode == 'sgdet':
                # use gt
                obj_preds = rm_obj_labels if rm_obj_labels is not None else obj_dists2[:, 1:].max(1)[1] + 1
                # use_predicted label
                # obj_preds = obj_dists2[:, 1:].max(1)[1] + 1
            else:
                obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(1)[1] + 1

        if self.mode == 'sgdet':
            return obj_dists2, obj_preds, im_inds, box_priors, rm_obj_labels, rois, nms_boxes
        else:
            return obj_dists2, obj_preds