コード例 #1
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                use_gt_boxes=False):
        batch_size = im_data.size(0)
        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base_model(im_data)

        if not use_gt_boxes:
            # feed base feature map tp RPN to obtain rois
            rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
                base_feat, im_info.data, gt_boxes.data, num_boxes.data)
        else:
            # otherwise use groundtruth box as the outputs of RCNN_rpn
            rois = gt_boxes.data.clone()
            rois[0, :, 0] = 0
            rois[0, :, 1:] = gt_boxes.data[0, :, :4]
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        if not self.training:
            if batch_size == 1:
                valid = rois.sum(2).view(-1).nonzero().view(-1)
                rois = rois[:, valid, :]

        rpn_loss = rpn_loss_cls + rpn_loss_bbox

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes.data)
            # rois, rois_obj_label, rois_att_label, \
            # rois_target, rois_inside_ws, rois_outside_ws = roi_data
            # rois_obj_label = Variable(rois_obj_label.view(-1))
            # rois_att_label = Variable(rois_att_label.view(-1, self.n_att_classes))
            # rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            # rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            # rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))

            roi_data_msdn = self.RCNN_proposal_target_msdn(rois, gt_boxes.data)

            rois, roi_rel_pairs, roi_pair_proposals, rois_obj_label, rois_att_label, rois_rel_label, \
            rois_target, rois_inside_ws, rois_outside_ws = roi_data_msdn
            rois_obj_label = Variable(rois_obj_label.view(-1))
            rois_att_label = Variable(
                rois_att_label.view(-1, self.n_att_classes))
            rois_rel_label = Variable(rois_rel_label.view(-1))
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))

            roi_pair_proposals = roi_pair_proposals.long()
            roi_pair_proposals_v = roi_pair_proposals.view(-1, 2)
            ind_subject = roi_pair_proposals_v[:, 0]
            ind_object = roi_pair_proposals_v[:, 1]
        else:

            rois_obj_label = None
            rois_att_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

            rois_out = []
            roi_rel_pairs_out = []
            roi_pair_proposals_out = []
            for i in range(rois.size(0)):
                rois, roi_rel_pairs, roi_pair_proposals = self._setup_connection(
                    rois[i])
                rois_out.append(rois)
                roi_rel_pairs_out.append(roi_rel_pairs)
                roi_pair_proposals_out.append(roi_pair_proposals)

            rois = torch.stack(rois_out, 0)
            roi_rel_pairs = torch.stack(roi_rel_pairs_out, 0)
            roi_pair_proposals = torch.stack(roi_pair_proposals_out, 0)

            roi_pair_proposals = roi_pair_proposals.long()
            roi_pair_proposals_v = roi_pair_proposals.view(-1, 2)
            ind_subject = roi_pair_proposals_v[:, 0]
            ind_object = roi_pair_proposals_v[:, 1]

        rois = Variable(rois)

        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        x_obj = self._head_to_tail(pooled_feat)  # (B x N) x D

        # compute object classification probability
        obj_cls_score = self.RCNN_obj_cls_score(x_obj)
        obj_cls_prob = F.softmax(obj_cls_score)
        bbox_pred = self.RCNN_bbox_pred(x_obj)

        if cfg.HAS_ATTRIBUTES:
            x_att = self._head_to_tail_att(pooled_feat)  # (B x N) x D
            att_cls_score = self.RCNN_att_cls_score(x_att)
            att_cls_prob = F.softmax(att_cls_score)
            att_cls_log_prob = F.log_softmax(att_cls_score)

        if cfg.HAS_RELATIONS:

            # feed base feature map tp RPN to obtain rois
            # x_view = x.view(rois.size(0), rois.size(1), x.size(1))
            # rel_feats = obj_cls_score.view(rois.size(0), rois.size(1), obj_cls_score.size(1))
            # roi_rel_pairs, roi_pair_proposals, roi_rel_pairs_score, relpn_loss_cls = \
            #     self.RELPN_rpn(rois.data, rel_feats, im_info.data, gt_boxes.data, num_boxes.data, use_gt_boxes)

            # relpn_loss = relpn_loss_cls

            # size_per_batch = x_obj.size(0) / batch_size

            # roi_pair_proposals = roi_pair_proposals + torch.arange(0, batch_size).view(batch_size, 1, 1).type_as(roi_pair_proposals)\
            #     * size_per_batch

            # roi_pair_proposals_v = roi_pair_proposals.view(-1, 2)
            # ind_subject = roi_pair_proposals_v[:, 0]
            # ind_object = roi_pair_proposals_v[:, 1]

            # if self.training:

            #     roi_pair_data = self.RELPN_proposal_target(roi_rel_pairs, gt_boxes.data, num_boxes.data)

            #     # pdb.set_trace()

            #     roi_rel_pairs, rois_rel_label, roi_pair_keep = roi_pair_data
            #     rois_rel_label = Variable(rois_rel_label.view(-1))

            #     roi_pair_keep = roi_pair_keep + torch.arange(0, roi_pair_keep.size(0)).view(roi_pair_keep.size(0), 1).cuda() \
            #                                     * roi_pair_proposals_v.size(0) / batch_size
            #     roi_pair_keep = roi_pair_keep.view(-1).long()

            #     ind_subject = roi_pair_proposals_v[roi_pair_keep][:, 0]
            #     ind_object = roi_pair_proposals_v[roi_pair_keep][:, 1]

            rois_pred = combine_box_pairs(roi_rel_pairs.view(-1, 9))
            rois_pred = Variable(rois_pred)

            # # do roi pooling based on predicted rois
            if cfg.POOLING_MODE == 'crop':
                grid_xy = _affine_grid_gen(rois_pred.view(-1, 5),
                                           base_feat.size()[2:],
                                           self.grid_size)
                grid_yx = torch.stack(
                    [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                    3).contiguous()
                pooled_pred_feat = self.RELPN_roi_crop(
                    base_feat,
                    Variable(grid_yx).detach())
                if cfg.CROP_RESIZE_WITH_MAX_POOL:
                    pooled_pred_feat = F.max_pool2d(pooled_feat, 2, 2)
            elif cfg.POOLING_MODE == 'align':
                pooled_pred_feat = self.RELPN_roi_align(
                    base_feat, rois_pred.view(-1, 5))
            elif cfg.POOLING_MODE == 'pool':
                pooled_pred_feat = self.RELPN_roi_pool(base_feat,
                                                       rois_pred.view(-1, 5))

            # # combine subject, object and relation feature tohether
            x_pred = self._head_to_tail_rel(pooled_pred_feat)

            x_rel = x_pred  #torch.cat((x_sobj, x_pred, x_oobj), 1)

            # compute object classification probability
            rel_cls_score = self.RCNN_rel_cls_score(x_rel)
            rel_cls_prob = F.softmax(rel_cls_score)

        if cfg.GCN_ON_FEATS and cfg.GCN_LAYERS > 0:

            if cfg.GCN_HAS_ATTENTION:
                x_sobj = obj_cls_score[ind_subject]
                x_oobj = obj_cls_score[ind_object]
                attend_score = self.GRCNN_gcn_att1(x_sobj, x_oobj)  # N_rel x 1
                attend_score = attend_score.view(1, x_pred.size(0))
            else:
                attend_score = Variable(
                    x_rel.data.new(1, x_pred.size(0)).fill_(1))

            # compute the intiial maps, including map_obj_att, map_obj_obj and map_obj_rel
            # NOTE we have two ways to compute map among objects, one way is based on the overlaps among object rois.
            # NOTE the intution behind this is that rois with overlaps should share some common features, we need to
            # NOTE exclude one roi feature from another.
            # NOTE another way is based on the classfication scores. The intuition is that, objects have some common
            # cooccurence, such as bus are more frequently appear on the road.
            assert x_obj.size() == x_att.size(
            ), "the numbers of object features and attribute features should be the same"

            size_per_batch = x_obj.size(0) / batch_size

            assert x_obj.size() == x_att.size(
            ), "the numbers of object features and attribute features should be the same"
            map_obj_att = torch.eye(x_obj.size(0)).type_as(x_obj.data)

            if cfg.MUTE_ATTRIBUTES:
                map_obj_att.zero_()
                x_att = x_att.detach()

            map_obj_att = Variable(map_obj_att)

            map_obj_obj = x_obj.data.new(x_obj.size(0),
                                         x_obj.size(0)).fill_(0.0)
            eye_mat = torch.eye(size_per_batch).type_as(x_obj.data)
            for i in range(batch_size):
                map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i *
                            size_per_batch:(i + 1) * size_per_batch].fill_(1.0)
                map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch] =\
                    map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch]\
                    - eye_mat

            map_obj_obj = Variable(map_obj_obj)

            map_sobj_rel = Variable(
                x_obj.data.new(x_obj.size(0), x_rel.size(0)).zero_())
            map_sobj_rel.scatter_(
                0, Variable(ind_subject.contiguous().view(1, x_rel.size(0))),
                attend_score)
            map_oobj_rel = Variable(
                x_obj.data.new(x_obj.size(0), x_rel.size(0)).zero_())
            map_oobj_rel.scatter_(
                0, Variable(ind_object.contiguous().view(1, x_rel.size(0))),
                attend_score)
            map_obj_rel = torch.stack((map_sobj_rel, map_oobj_rel), 1)

            if cfg.MUTE_RELATIONS:
                map_obj_rel.data.zero_()
                x_rel = x_rel.detach()

            mat_phrase = Variable(torch.stack((ind_subject, ind_object), 1))

            # map_obj_rel = Variable(map_obj_rel)

            # x_obj = F.relu(self.fc4obj(x_obj))
            # x_att = F.relu(self.fc4att(x_att))
            # x_pred = F.relu(self.fc4rel(x_pred))
            for i in range(cfg.GCN_LAYERS):
                # pass graph representation to gcn
                x_obj, x_rel = self.imp(x_obj, x_rel, map_obj_rel, mat_phrase)

            # pdb.set_trace()
            # compute object classification loss
            obj_cls_score = self.RCNN_obj_cls_score(x_obj)
            obj_cls_prob = F.softmax(obj_cls_score)

            # compute attribute classification loss
            att_cls_score = self.RCNN_att_cls_score(x_att)
            att_cls_prob = F.softmax(att_cls_score)
            att_cls_log_prob = F.log_softmax(att_cls_score)

            # compute relation classifcation loss
            # x_sobj = x_obj[ind_subject]
            # x_oobj = x_obj[ind_object]
            x_rel = x_pred  # torch.cat((x_sobj, x_pred, x_oobj), 1)
            rel_cls_score = self.RCNN_rel_cls_score(x_rel)
            rel_cls_prob = F.softmax(rel_cls_score)

        self.RCNN_loss_bbox = 0
        self.RCNN_loss_obj_cls = 0
        self.RCNN_loss_att_cls = 0
        self.RCNN_loss_rel_cls = 0

        if self.training:

            self.fg_cnt = torch.sum(rois_obj_label.data.ne(0))
            self.bg_cnt = rois_obj_label.data.numel() - self.fg_cnt
            self.RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                  rois_inside_ws,
                                                  rois_outside_ws)

            # object classification loss
            obj_label = rois_obj_label.long()
            self.RCNN_loss_obj_cls = F.cross_entropy(obj_cls_score, obj_label)

            # attribute classification loss
            if cfg.HAS_ATTRIBUTES:
                att_label = rois_att_label
                att_label = att_label[rois_obj_label.data.nonzero().squeeze()]
                # att_cls_score = att_cls_score[rois_obj_label.data.nonzero().squeeze()]
                # self.RCNN_loss_att_cls = F.multilabel_soft_margin_loss(att_cls_score, att_label)
                att_cls_log_prob = att_cls_log_prob[
                    rois_obj_label.data.nonzero().squeeze()]
                self.RCNN_loss_att_cls = _softmax_with_loss(
                    att_cls_log_prob, att_label)

            if cfg.HAS_RELATIONS:
                self.rel_fg_cnt = torch.sum(rois_rel_label.data.ne(0))
                self.rel_bg_cnt = rois_rel_label.data.numel() - self.rel_fg_cnt

                # ce_weights = rel_cls_score.data.new(rel_cls_score.size(1)).fill_(1)
                # ce_weights[0] = float(self.rel_bg_cnt) / (rois_rel_label.data.numel() + 1e-5)
                # ce_weights = ce_weights
                rel_label = rois_rel_label.long()
                self.RCNN_loss_rel_cls = F.cross_entropy(
                    rel_cls_score, rel_label)

        rcnn_loss = self.RCNN_loss_bbox + self.RCNN_loss_obj_cls

        if cfg.HAS_ATTRIBUTES and not cfg.MUTE_ATTRIBUTES:
            rcnn_loss += cfg.WEIGHT_ATTRIBUTES * self.RCNN_loss_att_cls

        if cfg.HAS_RELATIONS and not cfg.MUTE_RELATIONS:
            rcnn_loss += cfg.WEIGHT_RELATIONS * self.RCNN_loss_rel_cls

        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        obj_cls_prob = obj_cls_prob.view(batch_size, rois.size(1), -1)
        att_cls_prob = None if not cfg.HAS_ATTRIBUTES else att_cls_prob.view(
            batch_size, rois.size(1), -1)
        rel_cls_prob = None if not cfg.HAS_RELATIONS else rel_cls_prob.view(
            batch_size,
            rel_cls_prob.size(0) / batch_size, -1)

        if self.ext_feat:
            rel_pairs = roi_pair_proposals
            return base_feat, rois.data, rel_pairs, bbox_pred.data, x_obj.data, x_att.data, x_rel.data, \
                    obj_cls_prob.data, att_cls_prob.data, rel_cls_prob.data, \
                    obj_cls_score.data, att_cls_score.data, rel_cls_score.data

        if cfg.HAS_ATTRIBUTES and cfg.HAS_RELATIONS:
            if self.training:
                return rois, bbox_pred, obj_cls_prob, att_cls_prob, rel_cls_prob, rpn_loss, rcnn_loss
            else:
                rel_pairs = roi_pair_proposals
                return rois, rel_pairs, bbox_pred, obj_cls_prob, att_cls_prob, rel_cls_prob, rpn_loss, rcnn_loss
        elif cfg.HAS_ATTRIBUTES:
            return rois, bbox_pred, obj_cls_prob, att_cls_prob, rpn_loss, rcnn_loss
        else:
            return rois, bbox_pred, obj_cls_prob, rpn_loss, rcnn_loss
コード例 #2
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                use_gt_boxes=False):
        batch_size = im_data.size(0)
        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base_model(im_data)

        if not use_gt_boxes:
            # feed base feature map tp RPN to obtain rois
            rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
                base_feat, im_info.data, gt_boxes.data, num_boxes.data)
        else:
            # otherwise use groundtruth box as the outputs of RCNN_rpn
            rois = gt_boxes
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rpn_loss = rpn_loss_cls + rpn_loss_bbox

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes.data)
            rois, rois_obj_label, rois_att_label, \
            rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois_obj_label = Variable(rois_obj_label.view(-1))
            rois_att_label = Variable(
                rois_att_label.view(-1, self.n_att_classes))
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_obj_label = None
            rois_att_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        x_obj = self._head_to_tail(pooled_feat)  # (B x N) x D

        # compute object classification probability
        obj_cls_score = self.RCNN_obj_cls_score(x_obj)
        obj_cls_prob = F.softmax(obj_cls_score)
        bbox_pred = self.RCNN_bbox_pred(x_obj)

        if cfg.HAS_ATTRIBUTES:
            x_att = self._head_to_tail_att(pooled_feat)  # (B x N) x D
            att_cls_score = self.RCNN_att_cls_score(x_att)
            att_cls_prob = F.softmax(att_cls_score)
            att_cls_log_prob = F.log_softmax(att_cls_score)

        if cfg.HAS_RELATIONS:
            # feed base feature map tp RPN to obtain rois
            # x_view = x.view(rois.size(0), rois.size(1), x.size(1))
            rel_feats = obj_cls_score.view(rois.size(0), rois.size(1),
                                           obj_cls_score.size(1))
            roi_rel_pairs, roi_pair_proposals, relpn_loss_cls = \
                self.RELPN_rpn(rois.data, rel_feats, im_info.data, gt_boxes.data, num_boxes.data)

            relpn_loss = relpn_loss_cls

            roi_pair_proposals_v = roi_pair_proposals.view(-1, 2)
            ind_subject = roi_pair_proposals_v[:, 0]
            ind_object = roi_pair_proposals_v[:, 1]

            if self.training:

                roi_pair_data = self.RELPN_proposal_target(
                    roi_rel_pairs, gt_boxes.data, num_boxes.data)

                # pdb.set_trace()

                roi_rel_pairs, rois_rel_label, roi_pair_keep = roi_pair_data
                rois_rel_label = Variable(rois_rel_label.view(-1))

                roi_pair_keep = roi_pair_keep + torch.arange(0, roi_pair_keep.size(0)).view(roi_pair_keep.size(0), 1).cuda() \
                                                * roi_pair_proposals_v.size(0) / batch_size
                roi_pair_keep = roi_pair_keep.view(-1).long()

                ind_subject = roi_pair_proposals_v[roi_pair_keep][:, 0]
                ind_object = roi_pair_proposals_v[roi_pair_keep][:, 1]

            rois_pred = combine_box_pairs(roi_rel_pairs.view(-1, 9))
            rois_pred = Variable(rois_pred)

            # # do roi pooling based on predicted rois
            pooled_pred_feat = self.RELPN_roi_pool(base_feat,
                                                   rois_pred.view(-1, 5))

            # # combine subject, object and relation feature tohether
            x_pred = self._head_to_tail_rel(pooled_pred_feat)

            # x_sobj = x_obj[ind_subject]
            # x_oobj = x_obj[ind_object]

            x_rel = x_pred  #torch.cat((x_sobj, x_pred, x_oobj), 1)

            # compute object classification probability
            rel_cls_score = self.RCNN_rel_cls_score(x_rel)
            rel_cls_prob = F.softmax(rel_cls_score)

        if not cfg.GCN_ON_SCORES and cfg.GCN_LAYERS > 0 and cfg.HAS_ATTRIBUTES and cfg.HAS_RELATIONS:
            # compute the intiial maps, including map_obj_att, map_obj_obj and map_obj_rel
            # NOTE we have two ways to compute map among objects, one way is based on the overlaps among object rois.
            # NOTE the intution behind this is that rois with overlaps should share some common features, we need to
            # NOTE exclude one roi feature from another.
            # NOTE another way is based on the classfication scores. The intuition is that, objects have some common
            # cooccurence, such as bus are more frequently appear on the road.
            assert x_obj.size() == x_att.size(
            ), "the numbers of object features and attribute features should be the same"
            map_obj_att = torch.eye(x_obj.size(0)).type_as(x_obj.data)
            map_obj_obj = x_obj.data.new(x_obj.size(0),
                                         x_obj.size(0)).fill_(1.0) - torch.eye(
                                             x_obj.size(0)).type_as(x_obj.data)
            map_sobj_rel = x_obj.data.new(x_obj.size(0), x_rel.size(0)).zero_()
            map_sobj_rel.scatter_(
                0,
                ind_subject.contiguous().view(1, x_rel.size(0)),
                x_rel.data.new(1, x_pred.size(0)).fill_(1))
            map_oobj_rel = x_obj.data.new(x_obj.size(0), x_rel.size(0)).zero_()
            map_oobj_rel.scatter_(
                0,
                ind_object.contiguous().view(1, x_rel.size(0)),
                x_rel.data.new(1, x_pred.size(0)).fill_(1))
            map_obj_rel = torch.stack((map_sobj_rel, map_oobj_rel), 2)

            map_obj_att = Variable(map_obj_att)
            map_obj_obj = Variable(map_obj_obj)
            map_obj_rel = Variable(map_obj_rel)

            x_obj = F.relu(self.fc4obj(x_obj))
            x_att = F.relu(self.fc4att(x_att))
            x_pred = F.relu(self.fc4rel(x_pred))

            for i in range(cfg.GCN_LAYERS):
                # pass graph representation to gcn
                x_obj, x_att, x_pred = self.GRCNN_gcn(x_obj, x_att, x_pred,
                                                      map_obj_att, map_obj_obj,
                                                      map_obj_rel)

                # pdb.set_trace()
                # compute object classification loss
                obj_cls_score = self.RCNN_gcn_obj_cls_score(x_obj)
                obj_cls_prob = F.softmax(obj_cls_score)

                # compute attribute classification loss
                att_cls_score = self.RCNN_gcn_att_cls_score(x_att)
                att_cls_prob = F.softmax(att_cls_score)
                att_cls_log_prob = F.log_softmax(att_cls_score)

                # compute relation classifcation loss
                # x_sobj = x_obj[ind_subject]
                # x_oobj = x_obj[ind_object]
                x_rel = x_pred  # torch.cat((x_sobj, x_pred, x_oobj), 1)
                rel_cls_score = self.RCNN_gcn_rel_cls_score(x_rel)
                rel_cls_prob = F.softmax(rel_cls_score)

        if cfg.GCN_ON_SCORES and cfg.GCN_LAYERS > 0 and cfg.HAS_ATTRIBUTES and cfg.HAS_RELATIONS:
            # compute the intiial maps, including map_obj_att, map_obj_obj and map_obj_rel
            # NOTE we have two ways to compute map among objects, one way is based on the overlaps among object rois.
            # NOTE the intution behind this is that rois with overlaps should share some common features, we need to
            # NOTE exclude one roi feature from another.
            # NOTE another way is based on the classfication scores. The intuition is that, objects have some common
            # cooccurence, such as bus are more frequently appear on the road.

            assert x_obj.size() == x_att.size(
            ), "the numbers of object features and attribute features should be the same"
            map_obj_att = torch.eye(x_obj.size(0)).type_as(x_obj.data)
            map_obj_obj = x_obj.data.new(x_obj.size(0),
                                         x_obj.size(0)).fill_(1.0) - torch.eye(
                                             x_obj.size(0)).type_as(x_obj.data)
            map_sobj_rel = x_obj.data.new(x_obj.size(0), x_rel.size(0)).zero_()
            map_sobj_rel.scatter_(
                0,
                ind_subject.contiguous().view(1, x_rel.size(0)),
                x_rel.data.new(1, x_pred.size(0)).fill_(1))
            map_oobj_rel = x_obj.data.new(x_obj.size(0), x_rel.size(0)).zero_()
            map_oobj_rel.scatter_(
                0,
                ind_object.contiguous().view(1, x_rel.size(0)),
                x_rel.data.new(1, x_pred.size(0)).fill_(1))
            map_obj_rel = torch.stack((map_sobj_rel, map_oobj_rel), 2)

            map_obj_att = Variable(map_obj_att)
            map_obj_obj = Variable(map_obj_obj)
            map_obj_rel = Variable(map_obj_rel)

            for i in range(cfg.GCN_LAYERS):
                # pass graph representation to gcn
                obj_cls_score, att_cls_score, rel_cls_score =\
                    self.GRCNN_gcn(obj_cls_score, att_cls_score, rel_cls_score, map_obj_obj, map_obj_att, map_obj_rel)

                # compute object classification loss
                obj_cls_prob = F.softmax(obj_cls_score)

                # compute attribute classification loss
                att_cls_prob = F.softmax(att_cls_score)
                att_cls_log_prob = F.log_softmax(att_cls_score)

                # compute relation classifcation loss
                rel_cls_prob = F.softmax(rel_cls_score)

        self.RCNN_loss_bbox = 0
        self.RCNN_loss_obj_cls = 0
        self.RCNN_loss_att_cls = 0
        self.RCNN_loss_rel_cls = 0

        if self.training:

            self.fg_cnt = torch.sum(rois_obj_label.data.ne(0))
            self.bg_cnt = rois_obj_label.data.numel() - self.fg_cnt
            self.RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                  rois_inside_ws,
                                                  rois_outside_ws)

            # object classification loss
            obj_label = rois_obj_label.long()
            self.RCNN_loss_obj_cls = F.cross_entropy(obj_cls_score, obj_label)

            # attribute classification loss
            if cfg.HAS_ATTRIBUTES:
                att_label = rois_att_label
                att_label = att_label[rois_obj_label.data.nonzero().squeeze()]
                # att_cls_score = att_cls_score[rois_obj_label.data.nonzero().squeeze()]
                # self.RCNN_loss_att_cls = F.multilabel_soft_margin_loss(att_cls_score, att_label)
                att_cls_log_prob = att_cls_log_prob[
                    rois_obj_label.data.nonzero().squeeze()]
                self.RCNN_loss_att_cls = _softmax_with_loss(
                    att_cls_log_prob, att_label)

            if cfg.HAS_RELATIONS:
                self.rel_fg_cnt = torch.sum(rois_rel_label.data.ne(0))
                self.rel_bg_cnt = rois_rel_label.data.numel() - self.rel_fg_cnt
                rel_label = rois_rel_label.long()
                self.RCNN_loss_rel_cls = F.cross_entropy(
                    rel_cls_score, rel_label)

        rcnn_loss = self.RCNN_loss_bbox + self.RCNN_loss_obj_cls

        if cfg.HAS_ATTRIBUTES:
            rcnn_loss += 0.5 * self.RCNN_loss_att_cls

        if cfg.HAS_RELATIONS:
            rcnn_loss += 0.5 * self.RCNN_loss_rel_cls

        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        obj_cls_prob = obj_cls_prob.view(batch_size, rois.size(1), -1)
        att_cls_prob = None if not cfg.HAS_ATTRIBUTES else att_cls_prob.view(
            batch_size, rois.size(1), -1)
        rel_cls_prob = None if not cfg.HAS_RELATIONS else rel_cls_prob.view(
            batch_size, rois.size(1), -1)

        if cfg.HAS_ATTRIBUTES and cfg.HAS_RELATIONS:
            if self.training:
                return rois, bbox_pred, obj_cls_prob, att_cls_prob, rel_cls_prob, rpn_loss, relpn_loss, rcnn_loss
            else:
                rel_pairs = roi_pair_proposals
                return rois, rel_pairs, bbox_pred, obj_cls_prob, att_cls_prob, rel_cls_prob, rpn_loss, relpn_loss, rcnn_loss
        elif cfg.HAS_ATTRIBUTES:
            return rois, bbox_pred, obj_cls_prob, att_cls_prob, rpn_loss, rcnn_loss
        else:
            return rois, bbox_pred, obj_cls_prob, rpn_loss, rcnn_loss
コード例 #3
0
ファイル: test_grcnn.py プロジェクト: gyq716/my_graph_rcnn
            if inds.numel() > 0:
                rels_val = rels[inds]
                rel_cls_scores = rel_scores[:, j][inds]
                _, order = torch.sort(rel_cls_scores, 0, True)
                proposals_subject = pred_boxes[rels_val[order][:, 0]]
                proposals_object = pred_boxes[rels_val[order][:, 1]]
                rel_cls_scores = rel_cls_scores[order]
                keep = co_nms(
                    torch.cat((proposals_subject, proposals_object), 1),
                    cfg.TEST.NMS)
                keep = keep.long().view(-1)
                rel_cls_scores = rel_cls_scores[keep]
                roi_rel_pairs = pred_boxes.new(keep.size(0), 9).zero_()
                roi_rel_pairs[:, 1:5] = proposals_subject[keep]
                roi_rel_pairs[:, 5:9] = proposals_object[keep]
                rois_rel = combine_box_pairs(roi_rel_pairs)
                if vis:
                    im2show_rel = vis_relations(im2show_rel,
                                                imdb._relations[j],
                                                rel_cls_scores.cpu().numpy(),
                                                rois_rel[:, 1:].cpu().numpy(),
                                                0.5)

            # pdb.set_trace()
            # keep_idx_i = co_nms(torch.cat((proposals_subject, proposals_object), 1), nms_thresh)

        misc_toc = time.time()
        nms_time = misc_toc - misc_tic

        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
            .format(i + 1, num_images, detect_time, nms_time))
コード例 #4
0
def ext_feat_pred_hdf5_v2(model, im_data, im_info, gt_boxes, num_boxes):
    # extract graph representations from image and save it into hdf5

    base_feat = model.RCNN_base_model(im_data)
    rois, rpn_loss_cls, rpn_loss_bbox = model.RCNN_rpn(base_feat, im_info.data,
                                                       gt_boxes.data,
                                                       num_boxes.data)

    valid = rois.sum(2).view(-1).nonzero().view(-1)
    rois = rois[:, valid, :]

    rois = Variable(rois)
    if cfg.POOLING_MODE == 'crop':
        grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                   base_feat.size()[2:], self.grid_size)
        grid_yx = torch.stack(
            [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
            3).contiguous()
        pooled_feat = model.RCNN_roi_crop(base_feat,
                                          Variable(grid_yx).detach())
        if cfg.CROP_RESIZE_WITH_MAX_POOL:
            pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
    elif cfg.POOLING_MODE == 'align':
        pooled_feat = model.RCNN_roi_align(base_feat, rois.view(-1, 5))
    elif cfg.POOLING_MODE == 'pool':
        pooled_feat = model.RCNN_roi_pool(base_feat, rois.view(-1, 5))

    # feed pooled features to top model
    x_obj = model._head_to_tail(pooled_feat)  # (B x N) x D

    obj_cls_score = model.RCNN_obj_cls_score(x_obj)
    obj_cls_prob = F.softmax(obj_cls_score)
    bbox_pred = model.RCNN_bbox_pred(x_obj)

    # get attribute scores
    if cfg.SHARE_OBJ_ATT_FEATURE:
        x_att = x_obj
    else:
        x_att = model._head_to_tail_att(pooled_feat)  # (B x N) x D

    att_cls_score = model.RCNN_att_cls_score(x_att)
    att_cls_prob = F.softmax(att_cls_score)

    # filter rois first
    _, rois_pop_id = bbox_proposal_fast(obj_cls_prob.data.unsqueeze(0),
                                        att_cls_prob.data.unsqueeze(0),
                                        rois.data)
    rois_pop_id = rois_pop_id.view(-1)

    rois_pop = rois[:, rois_pop_id, :].data
    x_obj = x_obj[rois_pop_id]
    obj_cls_score = obj_cls_score[rois_pop_id]

    # get attribute features
    x_att = x_att[rois_pop_id]

    # propose relation between rois
    rel_feats = obj_cls_score.view(rois_pop.size(0), rois_pop.size(1),
                                   obj_cls_score.size(1))
    roi_rel_pairs, roi_pair_proposals, roi_rel_pairs_score, relpn_loss_cls = \
        model.RELPN_rpn(rois_pop, rel_feats, im_info.data, gt_boxes.data, num_boxes.data, False)

    valid = roi_rel_pairs.sum(2).view(-1).nonzero().view(-1)
    roi_rel_pairs = roi_rel_pairs[:, valid, :]
    roi_pair_proposals = roi_pair_proposals[:, valid, :]
    roi_rel_pairs_score = roi_rel_pairs_score[:, valid, :]

    size_per_batch = x_obj.size(0)
    roi_pair_proposals_v = roi_pair_proposals.view(-1, 2)
    ind_subject = roi_pair_proposals_v[:, 0]
    ind_object = roi_pair_proposals_v[:, 1]

    rois_pred = combine_box_pairs(roi_rel_pairs.view(-1, 9))
    rois_pred = Variable(rois_pred)

    # # do roi pooling based on predicted rois
    # pooled_pred_feat = self.RELPN_roi_pool(base_feat, rois_pred.view(-1,5))
    if cfg.POOLING_MODE == 'crop':
        grid_xy = _affine_grid_gen(rois_pred.view(-1, 5),
                                   base_feat.size()[2:], model.grid_size)
        grid_yx = torch.stack(
            [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
            3).contiguous()
        pooled_pred_feat = model.RELPN_roi_crop(base_feat,
                                                Variable(grid_yx).detach())
        if cfg.CROP_RESIZE_WITH_MAX_POOL:
            pooled_pred_feat = F.max_pool2d(pooled_feat, 2, 2)
    elif cfg.POOLING_MODE == 'align':
        pooled_pred_feat = model.RELPN_roi_align(base_feat,
                                                 rois_pred.view(-1, 5))
    elif cfg.POOLING_MODE == 'pool':
        pooled_pred_feat = model.RELPN_roi_pool(base_feat,
                                                rois_pred.view(-1, 5))

    # # combine subject, object and relation feature tohether
    x_pred = model._head_to_tail_rel(pooled_pred_feat)

    ind_subject = roi_pair_proposals_v[:, 0]
    ind_object = roi_pair_proposals_v[:, 1]

    # pdb.set_trace()

    if cfg.GCN_ON_FEATS and cfg.GCN_LAYERS > 0:

        if cfg.GCN_HAS_ATTENTION:
            x_sobj = obj_cls_score[ind_subject]
            x_oobj = obj_cls_score[ind_object]
            attend_score = model.GRCNN_gcn_att1(x_sobj, x_oobj)  # N_rel x 1
            attend_score = attend_score.view(1, x_pred.size(0))
        else:
            attend_score = Variable(
                x_pred.data.new(1, x_pred.size(0)).fill_(1))

        # compute the intiial maps, including map_obj_att, map_obj_obj and map_obj_rel
        # NOTE we have two ways to compute map among objects, one way is based on the overlaps among object rois.
        # NOTE the intution behind this is that rois with overlaps should share some common features, we need to
        # NOTE exclude one roi feature from another.
        # NOTE another way is based on the classfication scores. The intuition is that, objects have some common
        # cooccurence, such as bus are more frequently appear on the road.
        # assert x_obj.size() == x_att.size(), "the numbers of object features and attribute features should be the same"

        size_per_batch = int(x_obj.size(0))

        map_obj_att = torch.eye(x_obj.size(0)).type_as(x_obj.data)

        if cfg.MUTE_ATTRIBUTES:
            map_obj_att.zero_()
            x_att = x_att.detach()

        map_obj_att = Variable(map_obj_att)

        map_obj_obj = x_obj.data.new(x_obj.size(0), x_obj.size(0)).fill_(0.0)
        eye_mat = torch.eye(size_per_batch).type_as(x_obj.data)
        for i in range(batch_size):
            map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch,
                        i * size_per_batch:(i + 1) * size_per_batch].fill_(1.0)
            map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch] =\
                map_obj_obj[i * size_per_batch:(i + 1) * size_per_batch, i * size_per_batch:(i + 1) * size_per_batch]\
                - eye_mat

        map_obj_obj = Variable(map_obj_obj)

        map_sobj_rel = Variable(
            x_obj.data.new(x_obj.size(0), x_pred.size(0)).zero_())
        map_sobj_rel.scatter_(
            0, Variable(ind_subject.contiguous().view(1, x_pred.size(0))),
            attend_score)
        map_oobj_rel = Variable(
            x_obj.data.new(x_obj.size(0), x_pred.size(0)).zero_())
        map_oobj_rel.scatter_(
            0, Variable(ind_object.contiguous().view(1, x_pred.size(0))),
            attend_score)
        map_obj_rel = torch.stack((map_sobj_rel, map_oobj_rel), 2)

        if cfg.MUTE_RELATIONS:
            map_obj_rel.data.zero_()
            x_pred = x_pred.detach()

        # map_obj_rel = Variable(map_obj_rel)

        # x_obj = F.relu(self.fc4obj(x_obj))
        # x_att = F.relu(self.fc4att(x_att))
        # x_pred = F.relu(self.fc4rel(x_pred))

        for i in range(cfg.GCN_LAYERS):
            # pass graph representation to gcn
            x_obj, x_att, x_pred = model.GRCNN_gcn_feat(
                x_obj, x_att, x_pred, map_obj_att, map_obj_obj, map_obj_rel)

            # pdb.set_trace()
            # compute object classification loss
            obj_cls_score = model.RCNN_obj_cls_score(x_obj)
            obj_cls_prob = F.softmax(obj_cls_score)

            # compute attribute classification loss
            att_cls_score = model.RCNN_att_cls_score(x_att)
            att_cls_prob = F.softmax(att_cls_score)
            att_cls_log_prob = F.log_softmax(att_cls_score)

            # compute relation classifcation loss
            # x_sobj = x_obj[ind_subject]
            # x_oobj = x_obj[ind_object]
            rel_cls_score = model.RCNN_rel_cls_score(x_pred)
            rel_cls_prob = F.softmax(rel_cls_score)

    # pdb.set_trace()

    obj_feat = torch.cat((x_obj.data, rois_pop[0, :, 1:].contiguous()),
                         1).cpu()

    score_att_pop = att_cls_score[:, 1:]
    att_feat = x_att.data.cpu()

    rel_cls_prob[:, 0] = 0
    val, ind = rel_cls_prob.max(1)

    _, order_rel = torch.sort(val, 0, True)

    rel_feat = torch.zeros(MAX_REL_PAIRS, x_pred.size(1) + 2)
    rel_pop_id = order_rel[:MAX_REL_PAIRS].data

    all_feat = torch.cat((roi_pair_proposals_v[rel_pop_id].float().cpu(),
                          x_pred[rel_pop_id].data.cpu()), 1)

    if all_feat.size(0) < MAX_REL_PAIRS:
        rel_feat[:all_feat.size(0), :] = all_feat
    else:
        rel_feat = all_feat

    # pdb.set_trace()

    if vis:
        vis_dets(im_data, im_info, rois_pop,
                 bbox_pred.unsqueeze(0)[:, rois_pop_id, :].data,
                 obj_cls_prob.unsqueeze(0).data, imdb_vg)

    global_obj_feat = model._head_to_tail(base_feat)
    global_att_feat = model._head_to_tail_att(base_feat)
    # pdb.set_trace()

    return global_obj_feat.data.cpu().numpy(), global_att_feat.data.cpu(
    ).numpy(), obj_feat.numpy(), att_feat.numpy(), rel_feat.numpy()