Beispiel #1
0
    def forward(self, obj_logits, vr, obj_labels=None, boxes_per_cls=None):
        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_obj_cls))
        else:
            obj_dists2 = obj_logits

        if self.mode == 'sgdet' and not self.training:
            # NMS here for baseline
            probs = F.softmax(obj_dists2, 1)
            nms_mask = obj_dists2.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_dists2.size(1)):
                scores_ci = probs.data[:, c_i]
                boxes_ci = boxes_per_cls.data[:, c_i]

                keep = apply_nms(scores_ci,
                                 boxes_ci,
                                 pre_nms_topn=scores_ci.size(0),
                                 post_nms_topn=scores_ci.size(0),
                                 nms_thresh=0.3)
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * probs.data,
                                 volatile=True)[:, 1:].max(1)[1] + 1
        else:
            obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(
                1)[1] + 1

        rel_dists = self.vr_fc(vr)

        return obj_dists2, obj_preds, rel_dists
Beispiel #2
0
    def forward(self, obj_fmaps, obj_logits, vr, rel_inds, obj_labels=None, boxes_per_cls=None):
        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes))
        else:
            obj_dists2 = obj_logits

        if self.mode == 'sgdet' and not self.training:
            # NMS here for baseline
            probs = F.softmax(obj_dists2, 1)
            nms_mask = obj_dists2.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_dists2.size(1)):
                scores_ci = probs.data[:, c_i]
                boxes_ci = boxes_per_cls.data[:, c_i]

                keep = apply_nms(scores_ci, boxes_ci,
                                    pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0),
                                    nms_thresh=0.3)
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * probs.data, volatile=True)[:,1:].max(1)[1] + 1
        else:
            obj_preds = obj_labels if obj_labels is not None else obj_dists2[:,1:].max(1)[1] + 1

        f_obj_rel = torch.stack([torch.cat([obj_fmaps[rel_ind[1]], 
                                                 obj_fmaps[rel_ind[2]], 
                                                 vr[index]])
                                     for index, rel_ind in enumerate(rel_inds)])
        rel_dists = self.vr_fc(f_obj_rel)

        return obj_dists2, obj_preds, rel_dists       
    def obj_ctx(self, obj_feats, obj_labels=None, box_priors=None, boxes_per_cls=None, forest = None, batch_size=0):
        """
        Object context and object classification.
        :param obj_feats: [num_obj, img_dim + object embedding0 dim]
        :param obj_dists: [num_obj, #classes]
        :param im_inds: [num_obj] the indices of the images
        :param obj_labels: [num_obj] the GT labels of the image
        :param boxes: [num_obj, 4] boxes. We'll use this for NMS
        :return: obj_dists: [num_obj, #classes] new probability distribution.
                 obj_preds: argmax of that distribution.
                 obj_final_ctx: [num_obj, #feats] For later!
        """
        # use bidirectional tree lstm to update
        encoder_rep = self.obj_tree_lstm(forest, obj_feats, box_priors.shape[0])

        # Decode in order
        if self.mode != 'predcls':
            decode_feature = torch.cat((obj_feats, encoder_rep), 1) if self.pass_in_obj_feats_to_decoder else encoder_rep
            obj_dists, obj_preds = self.decoder_tree_lstm(forest, decode_feature, 
                                         box_priors.shape[0], 
                                         labels=obj_labels if obj_labels is not None else None, 
                                         boxes_for_nms=boxes_per_cls if boxes_per_cls is not None else None,
                                         batch_size=batch_size)
        else:
            assert obj_labels is not None
            obj_preds = obj_labels
            obj_dists = Variable(to_onehot(obj_preds.data[:-batch_size], self.num_classes))

        return obj_dists, obj_preds, encoder_rep
Beispiel #4
0
    def forward(self,
                obj_fmaps,
                obj_logits,
                rel_inds,
                vr,
                obj_labels=None,
                boxes_per_cls=None):
        """
        Reason relationship classes using knowledge of object and relationship coccurrence.
        """

        # print(rel_inds.shape)
        # (num_rel, 3)
        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_obj_cls))
        else:
            obj_dists2 = obj_logits

        if self.mode == 'sgdet' and not self.training:
            # NMS here for baseline
            probs = F.softmax(obj_dists2, 1)
            nms_mask = obj_dists2.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_dists2.size(1)):
                scores_ci = probs.data[:, c_i]
                boxes_ci = boxes_per_cls.data[:, c_i]

                keep = apply_nms(scores_ci,
                                 boxes_ci,
                                 pre_nms_topn=scores_ci.size(0),
                                 post_nms_topn=scores_ci.size(0),
                                 nms_thresh=0.3)
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * probs.data,
                                 volatile=True)[:, 1:].max(1)[1] + 1
        else:
            obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(
                1)[1] + 1

        sub_obj_preds = torch.cat((obj_preds[rel_inds[:, 1]].view(
            -1, 1), obj_preds[rel_inds[:, 2]].view(-1, 1)), 1)

        obj_fmaps = self.obj_proj(obj_fmaps)
        vr = self.rel_proj(vr)
        input_ggnn = torch.stack([
            torch.cat([
                obj_fmaps[rel_ind[1]].unsqueeze(0),
                obj_fmaps[rel_ind[2]].unsqueeze(0), vr[index].repeat(
                    self.num_rel_cls, 1)
            ], 0) for index, rel_ind in enumerate(rel_inds)
        ])

        rel_dists = self.ggnn_rel(rel_inds[:, 1:], sub_obj_preds, input_ggnn)

        return obj_dists2, obj_preds, rel_dists
Beispiel #5
0
    def obj_ctx(self,
                obj_feats,
                obj_dists,
                im_inds,
                obj_labels=None,
                box_priors=None,
                boxes_per_cls=None):
        """
        Object context and object classification.
        Args:
            obj_feats: Variable, Object features
                with shape of (NumOfRoIs, feature_dim + word_embbeding_dim + pos_embbeding)
            obj_dists: object class score, with shape of (num_obj, #classes)
            im_inds: the indices of the images, with shape of (NumOfRoI,)
            obj_labels: [num_obj] the GT labels of the image
            box_priors: box coordinates of objects, with shape of (NumOfRoI, 4)
            boxes_per_cls:
        Returns:
            obj_dists: [num_obj, #classes] new probability distribution.
            obj_preds: argmax of that distribution, with shape of (NumOfClasses,), the exact value of class index
            encoder_rep: torch.Tensor, encoder feature with shape of (NumOfRoI, biLSTM_dim(512))
        """
        # Sort by the confidence of the maximum detection.
        confidence = F.softmax(obj_dists, dim=1).data[:, 1:].max(1)[0]
        perm, inv_perm, ls_transposed = self.sort_rois(im_inds.data,
                                                       confidence, box_priors)
        # Pass object features, sorted by score, into the encoder LSTM
        obj_inp_rep = obj_feats[perm].contiguous()
        # embed(header='rel_model perm')
        input_packed = PackedSequence(obj_inp_rep, ls_transposed)

        encoder_rep = self.obj_ctx_rnn(input_packed)[0][0]
        # Decode in order
        if self.mode != 'predcls':
            decoder_inp = PackedSequence(
                torch.cat((obj_inp_rep, encoder_rep), 1)
                if self.pass_in_obj_feats_to_decoder else encoder_rep,
                ls_transposed)
            obj_dists, obj_preds = self.decoder_rnn(
                decoder_inp,
                labels=obj_labels[perm] if obj_labels is not None else None,
                boxes_for_nms=boxes_per_cls[perm]
                if boxes_per_cls is not None else None,
            )
            obj_preds = obj_preds[inv_perm]
            obj_dists = obj_dists[inv_perm]
        else:
            assert obj_labels is not None
            obj_preds = obj_labels
            obj_dists = Variable(to_onehot(obj_preds.data, self.num_classes))
        encoder_rep = encoder_rep[inv_perm]

        # embed(header='rel_model.py obj_ctx before return')
        return obj_dists, obj_preds, encoder_rep
    def obj_ctx(self, obj_feats, obj_dists, im_inds, obj_labels=None, box_priors=None, boxes_per_cls=None):
        """
        Object context and object classification.
        :param obj_feats: obj_pre_rep, [num_obj, 4096+200+128]
        :param obj_dists: result.rm_obj_dists.detach(), [num_obj, 151]
        :param im_inds: [num_obj] the indices of the images
        :param obj_labels: od_obj_labels, [num_obj] the GT labels of the image 
        :param boxes: [num_obj, 4] boxes. We'll use this for NMS
        :return: obj_dists: [num_obj, #classes] new probability distribution.
                 obj_preds: argmax of that distribution.
                 obj_final_ctx: [num_obj, #feats] For later!
        """
        #ipdb.set_trace()
        # Encode in order
        # Sort by the confidence of the maximum detection; 
        # [384,151]->[384,151]->[384,150]->[384,2], (scores, true_index - 1)-> scores; index is 150-d but truth is 151-d
        confidence = F.softmax(obj_dists, dim=1).data[:, 1:].max(1)[0]
        # sort rois(boxes) within the !same! img according to some order; 
        # a = a[perm][inv_perm]
        # perm: [384]; inv_perm: [384]; ls_transposed: len=64 (6,...6,6)
        perm, inv_perm, ls_transposed = self.sort_rois(im_inds.data, confidence, box_priors)
        # Pass object features, sorted within the same img by score
        obj_inp_rep = obj_feats[perm].contiguous()  # make cache/memory contiguous
        # [6, 64, 151], (batch_size, num_timesteps, input_size)
        input_packed = PackedSequence(obj_inp_rep, ls_transposed)
        # encoder_rep: [#boxes, 512]
        encoder_rep = self.obj_ctx_rnn(input_packed)[0][0]
        
        #ipdb.set_trace()

        # Decode in order
        if self.mode != 'predcls':
            decoder_inp = PackedSequence(torch.cat((obj_inp_rep, encoder_rep), 1) if self.pass_in_obj_feats_to_decoder else encoder_rep,
                                         ls_transposed)
            # when training sgdet: obj_preds = F.softmax(obj_dists, dim=1).data[:, 1:].max(1)[1] .+ 1
            obj_dists, obj_preds = self.decoder_rnn(
                decoder_inp, #obj_dists[perm],
                labels=obj_labels[perm] if obj_labels is not None else None,
                boxes_for_nms=boxes_per_cls[perm] if boxes_per_cls is not None else None,  # not None when sgdet
                )
            obj_preds = obj_preds[inv_perm]
            obj_dists = obj_dists[inv_perm]
        else:
            assert obj_labels is not None
            obj_preds = obj_labels
            obj_dists = Variable(to_onehot(obj_preds.data, self.num_classes))


        #obj_preds = Variable(F.softmax(obj_dists, dim=1).data[:, 1:].max(1)[1] + 1)
        encoder_rep = encoder_rep[inv_perm]

        return obj_dists, obj_preds, encoder_rep
Beispiel #7
0
    def forward(self,
                obj_fmaps,
                obj_logits,
                im_inds,
                obj_labels=None,
                box_priors=None,
                boxes_per_cls=None):
        """
        Forward pass through the object and edge context
        :param obj_priors:
        :param obj_fmaps:
        :param im_inds:
        :param obj_labels:
        :param boxes:
        :return:
        """

        obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight

        pos_embed = self.pos_embed(center_size(box_priors))
        # obj_pre_rep = self.conver_fusion_feature(torch.cat((obj_fmaps, obj_embed, pos_embed), 1))
        obj_pre_rep = self.conver_fusion_feature(
            torch.cat((obj_embed, pos_embed), 1))
        # UNSURE WHAT TO DO HERE
        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes))
        else:
            obj_dists2 = self.decoder_lin(obj_pre_rep)

        if self.mode == 'sgdet' and not self.training:
            # NMS here for baseline
            probs = F.softmax(obj_dists2, 1)
            nms_mask = obj_dists2.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_dists2.size(1)):
                scores_ci = probs.data[:, c_i]
                boxes_ci = boxes_per_cls.data[:, c_i]

                keep = apply_nms(scores_ci,
                                 boxes_ci,
                                 pre_nms_topn=scores_ci.size(0),
                                 post_nms_topn=scores_ci.size(0),
                                 nms_thresh=0.3)
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * probs.data,
                                 volatile=True)[:, 1:].max(1)[1] + 1
        else:
            obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(
                1)[1] + 1

        return obj_dists2, obj_preds, obj_pre_rep
Beispiel #8
0
    def obj_ctx(self,
                obj_feats,
                obj_dists,
                im_inds,
                obj_labels=None,
                box_priors=None,
                boxes_per_cls=None):
        """
        Object context and object classification.
        :param obj_feats: [num_obj, img_dim + object embedding0 dim]
        :param obj_dists: [num_obj, #classes]
        :param im_inds: [num_obj] the indices of the images
        :param obj_labels: [num_obj] the GT labels of the image
        :param boxes: [num_obj, 4] boxes. We'll use this for NMS
        :return: obj_dists: [num_obj, #classes] new probability distribution.
                 obj_preds: argmax of that distribution.
                 obj_final_ctx: [num_obj, #feats] For later!
        """
        # Sort by the confidence of the maximum detection.
        confidence = F.softmax(obj_dists, dim=1).data[:, 1:].max(1)[0]
        perm, inv_perm, ls_transposed = self.sort_rois(im_inds.data,
                                                       confidence, box_priors)
        # Pass object features, sorted by score, into the encoder LSTM
        obj_inp_rep = obj_feats[perm].contiguous()
        input_packed = PackedSequence(obj_inp_rep, ls_transposed)

        encoder_rep = self.obj_ctx_rnn(input_packed)[0][0]
        # Decode in order
        if self.mode != 'predcls':
            decoder_inp = PackedSequence(
                torch.cat((obj_inp_rep, encoder_rep), 1)
                if self.pass_in_obj_feats_to_decoder else encoder_rep,
                ls_transposed)
            obj_dists, obj_preds = self.decoder_rnn(
                decoder_inp,  #obj_dists[perm],
                labels=obj_labels[perm] if obj_labels is not None else None,
                boxes_for_nms=boxes_per_cls[perm]
                if boxes_per_cls is not None else None,
            )
            obj_preds = obj_preds[inv_perm]
            obj_dists = obj_dists[inv_perm]
        else:
            assert obj_labels is not None
            obj_preds = obj_labels
            obj_dists = Variable(to_onehot(obj_preds.data, self.num_classes))
        encoder_rep = encoder_rep[inv_perm]

        return obj_dists, obj_preds, encoder_rep
Beispiel #9
0
    def forward(self,
                obj_dists1,
                obj_feats,
                obj_labels=None,
                box_priors=None,
                boxes_per_cls=None):
        """
        Forward pass through the object and edge context
        :param obj_priors:
        :param obj_fmaps:
        :param im_inds:
        :param obj_labels:
        :param boxes:
        :return:
        """

        # UNSURE WHAT TO DO HERE
        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes))
        else:
            obj_dists2 = self.decoder_lin(obj_feats) + obj_dists1

        if self.mode == 'sgdet' and not self.training:
            # NMS here for baseline

            is_overlap = nms_overlaps(boxes_per_cls.data).view(
                boxes_per_cls.size(0), boxes_per_cls.size(0),
                boxes_per_cls.size(1)).cpu().numpy() >= 0.5

            probs = F.softmax(obj_dists2, 1).data.cpu().numpy()
            probs[:, 0] = 0
            obj_preds = obj_dists2.data.new(
                obj_dists2.shape[0]).long().fill_(0)

            for i in range(obj_preds.size(0)):
                box_ind, cls_ind = np.unravel_index(probs.argmax(),
                                                    probs.shape)
                obj_preds[int(box_ind)] = int(cls_ind)
                probs[is_overlap[box_ind, :, cls_ind], cls_ind] = 0.0
                probs[box_ind] = -1.0

            obj_preds = Variable(obj_preds.view(-1))
        else:
            obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(
                1)[1] + 1

        return obj_dists2, obj_preds
Beispiel #10
0
    def forward(self, im_inds, obj_fmaps, obj_labels):
        """
        Reason object classes using knowledge of object cooccurrence
        """

        if self.mode == 'predcls':
            # in task 'predcls', there is no need to run GGNN_obj
            obj_dists = Variable(to_onehot(obj_labels.data, self.num_obj_cls))
            return obj_dists
        else:
            input_ggnn = self.obj_proj(obj_fmaps)

            lengths = []
            for i, s, e in enumerate_by_image(im_inds.data):
                lengths.append(e - s)
            obj_cum_add = np.cumsum([0] + lengths)
            obj_dists = torch.cat([self.ggnn_obj(input_ggnn[obj_cum_add[i] : obj_cum_add[i+1]]) for i in range(len(lengths))], 0)
            return obj_dists
Beispiel #11
0
    def forward(self, obj_fmaps, obj_logits, im_inds, obj_labels=None, box_priors=None, boxes_per_cls=None):
        """
        Forward pass through the object and edge context
        :param obj_priors:
        :param obj_fmaps:
        :param im_inds:
        :param obj_labels:
        :param boxes:
        :return:
        """
        obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight
        pos_embed = self.pos_embed(Variable(center_size(box_priors)))
        obj_pre_rep = torch.cat((obj_fmaps, obj_embed, pos_embed), 1)

        if self.nl_obj > 0:
            obj_dists2, obj_preds, obj_ctx = self.obj_ctx(
                obj_pre_rep,
                obj_logits,
                im_inds,
                obj_labels,
                box_priors,
                boxes_per_cls,
            )
        else:
            # UNSURE WHAT TO DO HERE
            if self.mode == 'predcls':
                obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes))
            else:
                obj_dists2 = self.decoder_lin(obj_pre_rep)
            obj_preds = obj_labels if obj_labels is not None else obj_dists2[:,1:].max(1)[1] + 1
            obj_ctx = obj_pre_rep

        edge_ctx = None
        if self.nl_edge > 0:
            edge_ctx = self.edge_ctx(
                torch.cat((obj_fmaps, obj_ctx), 1) if self.pass_in_obj_feats_to_edge else obj_ctx,
                obj_dists=obj_dists2.detach(),  # Was previously obj_logits.
                im_inds=im_inds,
                obj_preds=obj_preds,
                box_priors=box_priors,
            )

        return obj_dists2, obj_preds, edge_ctx
    def obj_ctx(self, obj_feats, obj_labels=None, boxes_for_nms=None):
        """
        Object context and object classification.
        :param obj_feats: [num_obj, obj_dim + embed_dim + ctx_dim]: O0
        :param obj_labels: [num_obj] the GT labels of the image
        :param boxes_for_nms: [num_obj, 4] boxes. We'll use this for NMS
        :return: obj_dists: [num_obj, num_classes] new probability distribution: O4
                 obj_preds: [num_obj] argmax of that distribution: O4'
                 obj_ctx: [num_obj, hidden_dim] for later edge contex: O3
        """
        O1, O2 = self.RE1(obj_feats)
        obj_ctx, obj_dists = self.RE2(O2)

        if self.mode != 'predcls':
            obj_preds = self.get_max_preds(obj_dists, obj_labels,
                                           boxes_for_nms)
        else:
            assert obj_labels is not None
            obj_preds = obj_labels
            obj_dists = Variable(to_onehot(obj_preds.data, self.num_classes))

        return obj_dists, obj_preds, obj_ctx
Beispiel #13
0
    def forward(self, obj_fmaps, obj_logits, im_inds, obj_labels=None, box_priors=None, boxes_per_cls=None, batch_size=None,
                rois=None, od_box_deltas=None, im_sizes=None, image_offset=None, gt_classes=None, gt_boxes=None, ):
        """
        Forward pass through the object and edge context
        :param obj_priors:
        :param obj_fmaps:
        :param im_inds:
        :param obj_labels:
        :param boxes:
        :return:
        """
        obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight
        pos_embed = self.pos_embed(Variable(center_size(box_priors)))
        obj_pre_rep = torch.cat((obj_fmaps, obj_embed, pos_embed), 1)


        if self.mode == 'predcls':
            obj_dists2 = Variable(to_onehot(obj_labels.data, self.num_classes))
        else:
            if self.mode == 'sgcls':

                obj_dists2 = self.decoder_lin1(obj_pre_rep)
                obj_dists2 = self.decoder_lin2(obj_dists2.view(-1, 1, 1024), 1)

                obj_dists2 = obj_dists2[1]

                obj_dists2 = self.decoder_lin3(obj_dists2.view(-1, 1024))

            else:
                # this is for sgdet

                obj_dists2 = self.decoder_lin1(obj_pre_rep)

                perm, inv_perm, ls_transposed = self.sort_rois(im_inds.data, None, box_priors)
                obj_dists2 = obj_dists2[perm].contiguous()
                obj_dists2 = PackedSequence(obj_dists2, torch.tensor(ls_transposed))
                obj_dists2, lengths1 = pad_packed_sequence(obj_dists2, batch_first=False)


                obj_dists2 = self.decoder_lin2(obj_dists2.view(-1, batch_size, 1024), batch_size)[1]


                obj_dists2, _ = pack_padded_sequence(obj_dists2, lengths1, batch_first=False)
                obj_dists2 = self.decoder_lin3(obj_dists2.view(-1, 1024))
                obj_dists2 = obj_dists2[inv_perm]


                if (not self.training and not self.mode == 'gtbox') or self.mode in ('sgdet', 'refinerels'):
                    # try: dont apply nms here, but after own obj_classifier
                    nms_inds, nms_scores, nms_preds, nms_boxes_assign, nms_boxes, nms_imgs = self.nms_boxes(
                        obj_dists2.clone().detach(),
                        rois,
                        od_box_deltas.clone().detach(), im_sizes,
                    )
                    im_inds = nms_imgs + image_offset
                    obj_dists2 = obj_dists2[nms_inds]
                    obj_fmap = obj_fmaps[nms_inds]
                    box_deltas = od_box_deltas[nms_inds]
                    box_priors = nms_boxes[:, 0]
                    rois = rois[nms_inds]

                    if self.training and not self.mode == 'gtbox':
                        # NOTE: If we're doing this during training, we need to assign labels here.
                        pred_to_gtbox = bbox_overlaps(box_priors, gt_boxes).data
                        pred_to_gtbox[im_inds.data[:, None] != gt_classes.data[None, :, 0]] = 0.0

                        max_overlaps, argmax_overlaps = pred_to_gtbox.max(1)
                        rm_obj_labels = gt_classes[:, 1][argmax_overlaps]
                        rm_obj_labels[max_overlaps < 0.5] = 0
                    else:
                        rm_obj_labels = None

        if self.mode == 'sgdet' and not self.training:  # have tried in training
            # NMS here for baseline

            probs = F.softmax(obj_dists2, 1)
            nms_mask = obj_dists2.data.clone()
            nms_mask.zero_()
            for c_i in range(1, obj_dists2.size(1)):
                scores_ci = probs.data[:, c_i]
                boxes_ci = nms_boxes.data[:, c_i]

                keep = apply_nms(scores_ci, boxes_ci,
                                 pre_nms_topn=scores_ci.size(0), post_nms_topn=scores_ci.size(0),
                                 nms_thresh=0.5)#nms_thresh= 0.3 default
                nms_mask[:, c_i][keep] = 1

            obj_preds = Variable(nms_mask * probs.data, volatile=True)[:, 1:].max(1)[1] + 1  # this for sgdet test

            #obj_preds=obj_dists2[:,1:].max(1)[1] + 1
        else:
            if self.mode == 'sgdet':
                # use gt
                obj_preds = rm_obj_labels if rm_obj_labels is not None else obj_dists2[:, 1:].max(1)[1] + 1
                # use_predicted label
                # obj_preds = obj_dists2[:, 1:].max(1)[1] + 1
            else:
                obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(1)[1] + 1

        if self.mode == 'sgdet':
            return obj_dists2, obj_preds, im_inds, box_priors, rm_obj_labels, rois, nms_boxes
        else:
            return obj_dists2, obj_preds
    def forward(self, obj_fmaps, obj_logits, im_inds, obj_labels=None, box_priors=None, boxes_per_cls=None, gt_forest=None, image_rois=None, image_fmap=None, co_occour=None, rel_labels=None, origin_img=None):
        """
        Forward pass through the object and edge context
        :param obj_priors: [obj_num, (x1,y1,x2,y2)], float cuda
        :param obj_fmaps:
        :param im_inds: [obj_num] long variable
        :param obj_labels:
        :param boxes:
        :return:
        """
        if self.mode == 'predcls':
            obj_logits = Variable(to_onehot(obj_labels.data, self.num_classes))
            
        obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight
        
        batch_size = image_rois.shape[0]
        # pseudo box and image index: to encode virtual node into original inputs
        pseudo_box_priors = torch.cat((box_priors, image_rois[:, 1:].contiguous().data), 0)  # [obj_num + batch_size, 4]
        pseudo_im_inds = torch.cat((im_inds, image_rois[:,0].contiguous().long().view(-1)), 0) # [obj_num + batch_size]
        pseudo_obj_fmaps = torch.cat((obj_fmaps.clone().detach(), image_fmap.detach()), 0)  # [obj_num + batch_size, 4096]
        virtual_embed = self.virtual_node_embed.weight[0].view(1, -1).expand(batch_size, -1)
        pseudo_obj_embed = torch.cat((obj_embed, virtual_embed), 0) # [obj_num + batch_size, embed_dim]
        if self.training or (self.mode == 'predcls'):
            pseudo_obj_labels = torch.cat((obj_labels, Variable(torch.randn(1).fill_(0).cuda()).expand(batch_size).long().view(-1)), 0)
        else:
            pseudo_obj_labels = None
        
        if self.mode == 'sgdet':
            obj_distributions = F.softmax(obj_logits, dim=1)[:,1:]
        else:
            obj_distributions = F.softmax(obj_logits[:,1:], dim=1)
        pseudo_obj_distributions = torch.cat((obj_distributions, Variable(torch.randn(batch_size, obj_distributions.shape[1]).fill_(0).cuda())), 0)
        # generate RL gen tree input
        box_embed = tree_utils.get_box_info(Variable(pseudo_box_priors)) # 8-digits
        overlap_embed, _ = tree_utils.get_overlap_info(pseudo_im_inds, Variable(pseudo_box_priors)) # 4-digits
        prepro_feat = self.feat_preprocess_net(pseudo_obj_fmaps, pseudo_obj_embed, box_embed, overlap_embed)
        pair_scores, pair_rel_gate, pair_rel_gt = self.rl_score_net(prepro_feat, pseudo_obj_distributions, co_occour, rel_labels, batch_size, im_inds, pseudo_im_inds)

        #print('node_scores', node_scores.data.cpu().numpy())
        arbitrary_forest, gen_tree_loss, entropy_loss = gen_tree.generate_forest(pseudo_im_inds, gt_forest, pair_scores, Variable(pseudo_box_priors), pseudo_obj_labels, self.use_rl_tree, self.training, self.mode)
        forest = arbitraryForest_to_biForest(arbitrary_forest)

        pseudo_pos_embed = self.pos_embed(Variable(center_size(pseudo_box_priors)))
        obj_pre_rep = torch.cat((pseudo_obj_fmaps, pseudo_obj_embed, pseudo_pos_embed), 1)
        if self.nl_obj > 0:
            obj_dists2, obj_preds, obj_ctx = self.obj_ctx(
                obj_pre_rep,
                pseudo_obj_labels,
                pseudo_box_priors,
                boxes_per_cls,
                forest,
                batch_size
            )
        else:
            print('Error, No obj ctx')

        edge_ctx = None
        if self.nl_edge > 0:
            edge_ctx = self.edge_ctx(
                torch.cat((pseudo_obj_fmaps, obj_ctx), 1) if self.pass_in_obj_feats_to_edge else obj_ctx,
                obj_preds=obj_preds,
                box_priors=pseudo_box_priors,
                forest = forest,
            )

        # draw tree
        if self.draw_tree and (self.draw_tree_count < self.draw_tree_max):
            for tree_idx in range(len(forest)):
                draw_tree_region(forest[tree_idx], origin_img, self.draw_tree_count)
                draw_tree_region_v2(forest[tree_idx], origin_img, self.draw_tree_count, obj_preds)
                self.draw_tree_count += 1

        # remove virtual nodes
        return obj_dists2, obj_preds[:-batch_size], edge_ctx[:-batch_size], gen_tree_loss, entropy_loss, pair_rel_gate, pair_rel_gt
Beispiel #15
0
    def forward(self,
                x,
                im_sizes,
                image_offset,
                gt_boxes=None,
                gt_classes=None,
                gt_rels=None,
                proposals=None,
                train_anchor_inds=None,
                return_fmap=False,
                depth_imgs=None):
        """
        Forward pass for relation detection
        :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE]
        :param im_sizes: a numpy array of (h, w, scale) for each image.
        :param image_offset: oOffset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param gt_rels: [] gt relations
        :param proposals: region proposals retrieved from file
        :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will
                                  be used to compute the training loss. Each (img_ind, fpn_idx)
        :param return_fmap: if the object detector must return the extracted feature maps
        :param depth_imgs: depth images [batch_size, 1, IM_SIZE, IM_SIZE]
        """

        # -- Get prior `result` object (instead of calling faster-rcnn-detector)
        result = self.get_prior_results(image_offset, gt_boxes, gt_classes,
                                        gt_rels)

        # -- Get RoI and relations
        rois, rel_inds = self.get_rois_and_rels(result, image_offset, gt_boxes,
                                                gt_classes, gt_rels)

        # -- Determine subject and object indices
        subj_inds = rel_inds[:, 1]
        obj_inds = rel_inds[:, 2]

        # -- Extract features from depth backbone
        depth_features = self.depth_backbone(depth_imgs)

        # -- Prevent the gradients from flowing back to depth backbone (Pre-trained mode)
        if self.pretrained_depth:
            depth_features = depth_features.detach()

        # -- Extract RoI features for relation detection
        depth_rois_features = self.get_roi_features_depth(depth_features, rois)

        # -- Create a pairwise relation vector out of location features
        rel_depth = torch.cat(
            (depth_rois_features[subj_inds], depth_rois_features[obj_inds]), 1)
        rel_depth_fc = self.depth_rel_hlayer(rel_depth)

        # -- Predict relation distances
        result.rel_dists = self.depth_rel_out(rel_depth_fc)

        # --- *** END OF ARCHITECTURE *** ---#

        # -- Prepare object predictions vector (PredCLS)
        # Assuming its predcls
        obj_labels = result.rm_obj_labels if self.training or self.mode == 'predcls' else None
        # One hot vector of objects
        result.rm_obj_dists = Variable(
            to_onehot(obj_labels.data, self.num_classes))
        # Indexed vector
        result.obj_preds = obj_labels if obj_labels is not None else result.rm_obj_dists[:, 1:].max(
            1)[1] + 1

        if self.training:
            return result

        twod_inds = arange(
            result.obj_preds.data) * self.num_classes + result.obj_preds.data
        result.obj_scores = F.softmax(result.rm_obj_dists,
                                      dim=1).view(-1)[twod_inds]

        # Boxes will get fixed by filter_dets function.
        bboxes = result.rm_box_priors

        rel_rep = F.softmax(result.rel_dists, dim=1)
        # Filtering: Subject_Score * Pred_score * Obj_score, sorted and ranked
        return filter_dets(bboxes, result.obj_scores, result.obj_preds,
                           rel_inds[:, 1:], rel_rep)
Beispiel #16
0
    def forward(self,
                obj_fmaps,
                obj_logits,
                im_inds,
                obj_labels=None,
                box_priors=None,
                boxes_per_cls=None):
        """
        Forward pass through the object and edge context
        :param obj_priors: from faster rcnn output boxes
        :param obj_fmaps: 4096-dim roi feature maps
        :param obj_logits: result.rm_obj_dists.detach()
        :param im_inds:
        :param obj_labels: od_obj_labels, gt
        :param boxes:
        :return: obj_dists2: [#boxes, 151], new score for boxes
                 obj_preds: [#boxes], prediction/class value
                 edge_ctx: [#boxes, 512], new features for boxes

        """

        # Object State:
        # obj_embed: [#boxes, 200], and self.obj_embed.weight are both Variable
        # obj_logits: result.rm_obj_dists.detach(), [#boxes, 151], detector scores before softmax
        obj_embed = F.softmax(obj_logits, dim=1) @ self.obj_embed.weight
        # center_size returns boxes as (center_x, center_y, width, height)
        # pos_embed: [#boxes, 128], Variable, from boxes after Sequential processing
        pos_embed = self.pos_embed(Variable(center_size(box_priors)))
        # obj_pre_rep: [#boxes, 4424], Variable
        obj_pre_rep = torch.cat((obj_fmaps, obj_embed, pos_embed), 1)

        if self.nl_obj > 0:
            # obj_dists2: [#boxes, 151], new score for box
            # obj_preds: [#boxes], prediction/class value
            # obj_ctx: [#boxes, 512], new features vector for box
            obj_dists2, obj_preds, obj_ctx = self.obj_ctx(
                obj_pre_rep,  #obj_fmaps,  # original: obj_pre_rep,
                obj_logits,
                im_inds,
                obj_labels,
                box_priors,
                boxes_per_cls,
            )
        else:
            # UNSURE WHAT TO DO HERE
            if self.mode == 'predcls':
                obj_dists2 = Variable(
                    to_onehot(obj_labels.data, self.num_classes))
            else:
                obj_dists2 = self.decoder_lin(obj_pre_rep)

            if self.mode == 'sgdet' and not self.training:
                # NMS here for baseline

                probs = F.softmax(obj_dists2, 1)
                nms_mask = obj_dists2.data.clone()
                nms_mask.zero_()
                for c_i in range(1, obj_dists2.size(1)):
                    scores_ci = probs.data[:, c_i]
                    boxes_ci = boxes_per_cls.data[:, c_i]

                    keep = apply_nms(scores_ci,
                                     boxes_ci,
                                     pre_nms_topn=scores_ci.size(0),
                                     post_nms_topn=scores_ci.size(0),
                                     nms_thresh=0.3)
                    nms_mask[:, c_i][keep] = 1

                obj_preds = Variable(nms_mask * probs.data,
                                     volatile=True)[:, 1:].max(1)[1] + 1
            else:
                obj_preds = obj_labels if obj_labels is not None else obj_dists2[:, 1:].max(
                    1)[1] + 1
            obj_ctx = obj_pre_rep

        # Edge State:
        edge_ctx = None

        if self.nl_edge > 0:
            # edge_ctx: [#boxes, 512]
            edge_ctx = self.edge_ctx(
                torch.cat((obj_fmaps, obj_ctx), 1)
                if self.pass_in_obj_feats_to_edge else obj_ctx,
                obj_dists=obj_dists2.detach(),  # Was previously obj_logits.
                im_inds=im_inds,
                obj_preds=obj_preds,
                box_priors=box_priors,
            )

        return obj_dists2, obj_preds, edge_ctx
    def forward(self,
                x,
                im_sizes,
                image_offset,
                gt_boxes=None,
                gt_masks=None,
                gt_classes=None,
                gt_rels=None,
                pred_boxes=None,
                pred_masks=None,
                pred_fmaps=None,
                pred_dists=None):
        """
        Forward pass for detection
        :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE]
        :param im_sizes: A numpy array of (h, w, scale) for each image.
        :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0)
        :param gt_boxes:

        Training parameters:
        :param gt_boxes: [num_gt, 4] GT boxes over the batch.
        :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
        :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will
                                  be used to compute the training loss. Each (img_ind, fpn_idx)
        :return: If train:
            scores, boxdeltas, labels, boxes, boxtargets, rpnscores, rpnboxes, rellabels

            if test:
            prob dists, boxes, img inds, maxscores, classes

        pred_fmaps  N*256*14*14
        pred_boxes  N*4
        pred_masks  N*28*28
        pred_dists  N*85

        """
        #print(pred_fmaps.shape, pred_boxes.shape, pred_masks.shape, pred_dists.shape)

        if self.training:
            im_inds = gt_classes[:, 0]
            rois = torch.cat((im_inds.float()[:, None], gt_boxes), 1)
            # actually is rel_assignment for sgcls
            # 指定rel的gt, roi不发生变化
            rois, labels, rel_labels = proposal_assignments_gtbox(
                rois.data, gt_boxes.data, gt_classes.data, gt_rels.data,
                image_offset)
            #boxes = rois[:, 1:]
            pred_boxes = rois[:, 1:]
            pred_masks = gt_masks
            pred_dists = Variable(to_onehot(labels.data, self.num_classes))
        else:
            im_inds = pred_boxes[:, 0].long()
            pred_boxes = pred_boxes[:, 1:]
            labels = gt_classes[:, 1]
            rel_labels = None
            pred_dists = Variable(
                to_onehot(pred_dists.data.long(), self.num_classes))
            rois = torch.cat((im_inds[:, None].float(), pred_boxes), 1)

        result = Result()
        #pred_fmaps = pred_fmaps * self.downsample(pred_masks[:, None, :, :])
        #result.obj_fmap = self.roi_fmap_obj(pred_fmaps.view(len(pred_fmaps), -1))
        result.obj_fmap = self.obj_feature_map(pred_fmaps, rois)
        result.rm_obj_dists = pred_dists
        result.rm_obj_labels = labels
        result.rel_labels = rel_labels
        #result.boxes_all = None
        rel_inds = self.get_rel_inds(result.rel_labels, im_inds, pred_boxes)
        #rois = torch.cat((im_inds[:, None].float(), boxes), 1)

        # result.obj_fmap = self.obj_feature_map(result.fmap, rois)
        #  print(pred_fmaps[0][0][0])
        #  print(result.rm_obj_labels[0])
        #  print(result.rm_obj_dists[0][:10])
        #  print(pred_boxes.data[[0]])
        # Prevent gradients from flowing back into score_fc from elsewhere
        result.rm_obj_dists, result.obj_preds, edge_ctx = self.context(
            result.obj_fmap, result.rm_obj_dists, im_inds, result.rm_obj_labels
            if self.training or self.mode == 'predcls' else None,
            pred_boxes.data, None)

        #print(fdsafds)
        if edge_ctx is None:
            edge_rep = self.post_emb(result.obj_preds)
        else:
            edge_rep = self.post_lstm(edge_ctx)

        # Split into subject and object representations
        edge_rep = edge_rep.view(edge_rep.size(0), 2, self.pooling_dim)

        subj_rep = edge_rep[:, 0]
        obj_rep = edge_rep[:, 1]

        prod_rep = subj_rep[rel_inds[:, 1]] * obj_rep[rel_inds[:, 2]]
        vr = self.visual_rep(pred_fmaps, rois, rel_inds[:, 1:])
        prod_rep = prod_rep * vr
        # if self.use_vision:
        #     vr = self.visual_rep(pred_fmaps, rois, rel_inds[:, 1:])
        #     if self.limit_vision:
        #         # exact value TBD
        #         prod_rep = torch.cat((prod_rep[:, :2048] * vr[:, :2048], prod_rep[:, 2048:]), 1)
        #     else:
        #         prod_rep = prod_rep * vr

        if self.use_tanh:
            prod_rep = F.tanh(prod_rep)

        result.rel_dists = self.rel_compress(prod_rep)

        if self.use_bias:
            result.rel_dists = result.rel_dists + self.freq_bias.index_with_labels(
                torch.stack((
                    result.obj_preds[rel_inds[:, 1]],
                    result.obj_preds[rel_inds[:, 2]],
                ), 1))

        if self.training:
            return result

        twod_inds = arange(
            result.obj_preds.data) * self.num_classes + result.obj_preds.data
        result.obj_scores = F.softmax(result.rm_obj_dists,
                                      dim=1).view(-1)[twod_inds]

        # # Bbox regression
        # if self.mode == 'sgdet':
        #     bboxes = result.boxes_all.view(-1, 4)[twod_inds].view(result.boxes_all.size(0), 4)
        # else:
        #     # Boxes will get fixed by filter_dets function.
        #     bboxes = result.rm_box_priors

        rel_rep = F.softmax(result.rel_dists, dim=1)
        return filter_dets_mask(pred_boxes, pred_masks, result.obj_scores,
                                result.obj_preds, rel_inds[:, 1:], rel_rep)