Ejemplo n.º 1
0
    def forward(self, x, rois, roi_indices):
        """Forward the chain.

        We assume that there are :math:`N` batches.

        Args:
            x (Variable): 4D image variable.
            rois (Tensor): A bounding box array containing coordinates of
                proposal boxes.  This is a concatenation of bounding box
                arrays from multiple images in the batch.
                Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed
                RoIs from the :math:`i` th image,
                :math:`R' = \\sum _{i=1} ^ N R_i`.
            roi_indices (Tensor): An array containing indices of images to
                which bounding boxes correspond to. Its shape is :math:`(R',)`.

        """
        # in case roi_indices is  ndarray
        roi_indices = at.to_tensor(roi_indices).float()
        rois = at.to_tensor(rois).float()
        indices_and_rois = t.cat([roi_indices[:, None], rois], dim=1)
        # NOTE: important: the prediction of roi will be tensor type([N,H,W]), so convert back from yx->xy
        xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]]
        indices_and_rois = xy_indices_and_rois.contiguous()

        pool = self.roi(x, indices_and_rois)
        pool = pool.view(pool.size(0), -1)
        fc7 = self.classifier(pool)
        roi_cls_locs = self.cls_loc(fc7)
        roi_scores = self.score(fc7)
        return roi_cls_locs, roi_scores
Ejemplo n.º 2
0
    def train_forward_net(self, img_names, imgs, bboxes, labels, indexs, gt_relations, scale):
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        # Since batch size is one, convert variables to singular form
        bboxes = bboxes[0]
        gt_relations = gt_relations[0]
        _, _, H, W = imgs.shape  # Tensor type (C,H,W)

        feature = self.net.extractor(imgs)
        combined_features, rel_flip = get_combined_feature(feature, bboxes, use_spatial_feature=self.conf.use_spatial_feature)
        relation_scores = []
        for combined_feature in combined_features:
            relation_score = self.net(*combined_feature)
            relation_scores.append(relation_score)
        relation_scores = torch.stack(relation_scores, dim=1).squeeze(0)

        ##to see if need flip
        for n, flip in enumerate(rel_flip):
            if flip: #if flip
                if gt_relations[n] == 1:
                    gt_relations[n] = 2
                elif gt_relations[n] == 2:
                    gt_relations[n] = 1
        gt_relations = at.to_tensor(gt_relations).long().cuda()
        #self.rel_cm.add(relation_scores, gt_relations.data)
        relation_loss = nn.CrossEntropyLoss()(relation_scores, gt_relations) ##(onehot tensor, not onehot long tensor)
        return LossTuple(total_loss=relation_loss)
Ejemplo n.º 3
0
def _smooth_l1_loss(x, t, in_weight, sigma):
    sigma2 = sigma**2
    diff = in_weight * (x - t)
    abs_diff = diff.abs()
    flag = (abs_diff.data < (1. / sigma2)).float()
    flag = at.to_tensor(flag)
    y = (flag * (sigma2 / 2.) * (diff**2) + (1 - flag) *
         (abs_diff - 0.5 / sigma2))
    return y.sum()
Ejemplo n.º 4
0
def _fast_rcnn_loc_loss(pred_loc, gt_loc, gt_label, sigma):
    in_weight = torch.zeros(gt_loc.shape).cuda()
    # Localization loss is calculated only for positive rois.
    # NOTE:  unlike origin implementation,
    # we don't need inside_weight and outside_weight, they can calculate by gt_label
    in_weight[(gt_label > 0).view(-1, 1).expand_as(in_weight).cuda()] = 1
    loc_loss = _smooth_l1_loss(pred_loc, gt_loc, at.to_tensor(in_weight),
                               sigma)
    # Normalize by total number of negtive and positive rois.
    loc_loss /= (gt_label >= 0).sum().to(
        torch.float)  # ignore gt_label==-1 for rpn_loss
    return loc_loss
Ejemplo n.º 5
0
    def forward(self, x, rois, roi_indices, if_color=False):
        """Forward the chain.
    We assume that there are :math:`N` batches.

    Args:
        x (Variable): 4D image variable.
        rois (Tensor): A bounding box array containing coordinates of
            proposal boxes.  This is a concatenation of bounding box
            arrays from multiple images in the batch.
            Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed
            RoIs from the :math:`i` th image,
            :math:`R' = \\sum _{i=1} ^ N R_i`.
        roi_indices (Tensor): An array containing indices of images to
            which bounding boxes correspond to. Its shape is :math:`(R',)`.

    """
        # in case roi_indices is ndarray
        roi_indices = at.to_tensor(roi_indices).float()
        rois = at.to_tensor(rois).float()
        indices_and_rois = torch.cat([roi_indices[:, None], rois], dim=1)
        # NOTE: important: yx->xy
        xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]]
        indices_and_rois = xy_indices_and_rois.contiguous()

        #print("shape of x: {0}".format(x.shape))
        #pool = self.roi_pool(x, indices_and_rois.view(-1, 5))
        #print("shape of pool: {0}".format(pool.shape)) #128,1024,7,7
        #print(pool.shape)
        pool = self.roi(x, indices_and_rois)
        #pool = pool.view(pool.size(0), -1) #hide when use layer 4 clf
        fc7 = self.classifier(pool).mean(3).mean(
            2)  # [128, 7*7*512] #[128,7*7*1024] mean used when use layer 4 clf
        #print('fc7.shape',fc7.shape)
        roi_cls_locs = self.cls_loc(fc7)
        roi_scores = self.score(fc7)
        return roi_cls_locs, roi_scores
Ejemplo n.º 6
0
    def predict(self, img, scale, visualize=False):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            img Image object
        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval()
        if visualize:
            self.use_preset('visualize')
        else:
            self.use_preset('evaluate')
        with t.no_grad():
            # size = [img.size[1], img.size[0]]  # H,W
            # img = at.to_tensor(self.pred_transform(img)).float()[None]  # add batch dimension
            # scale = img.size(3) / size[1]  # w_new/w_old
            # # recover original size
            scale = np.asscalar(scale.numpy()[0])
            size = np.round([img.size(2) / scale, img.size(3) / scale])
            img = img.float().cuda()

            roi_name_locs, roi_name_scores, roi_color_locs, roi_color_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_name_scores = roi_name_scores.data
            roi_name_locs = roi_name_locs.data
            # color
            roi_color_scores = roi_color_scores.data
            roi_color_locs = roi_color_locs.data

            roi_org = at.to_tensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.head_name.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.head_name.n_class)[None]

            roi_name_locs = (roi_name_locs * std + mean)
            roi_name_locs = roi_name_locs.view(-1, self.head_name.n_class, 4)
            roi = roi_org.view(-1, 1, 4).expand_as(roi_name_locs)
            name_bbox = loc2bbox(at.to_np(roi).reshape((-1, 4)),
                                at.to_np(roi_name_locs).reshape((-1, 4)))
            name_bbox = at.to_tensor(name_bbox)
            name_bbox = name_bbox.view(-1, self.head_name.n_class * 4)
            # clip bounding box
            name_bbox[:, 0::2] = (name_bbox[:, 0::2]).clamp(min=0, max=size[0])
            name_bbox[:, 1::2] = (name_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.to_np(F.softmax(at.to_tensor(roi_name_scores), dim=1))
            # print("prob : {0}".format(prob.shape))#(300,4)
            raw_name_bboxes_ = at.to_np(name_bbox)
            raw_name_probs_ = at.to_np(prob)

            ##color
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.head_color.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.head_color.n_class)[None]

            roi_color_locs = (roi_color_locs * std + mean)
            roi_color_locs = roi_color_locs.view(-1, self.head_color.n_class, 4)
            roi = roi_org.view(-1, 1, 4).expand_as(roi_color_locs)
            color_bbox = loc2bbox(at.to_np(roi).reshape((-1, 4)),
                                 at.to_np(roi_color_locs).reshape((-1, 4)))
            color_bbox = at.to_tensor(color_bbox)
            color_bbox = color_bbox.view(-1, self.head_color.n_class * 4)
            # clip bounding box
            color_bbox[:, 0::2] = (color_bbox[:, 0::2]).clamp(min=0, max=size[0])
            color_bbox[:, 1::2] = (color_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.to_np(F.softmax(at.to_tensor(roi_color_scores), dim=1))
            # print("prob : {0}".format(prob.shape))#(300,4)
            raw_color_bboxes_ = at.to_np(color_bbox)
            raw_color_probs_ = at.to_np(prob)

            bboxes_, names_, name_scores_, colors_, color_scores_ \
                = self._suppress(raw_name_bboxes_, raw_name_probs_, raw_color_bboxes_, raw_color_probs_, prob_thresh=0.7)
            """
            print('name')
            print(names_, name_scores_)
            print('color')
            print(colors_, color_scores_)
            set_trace()
            """
            if raw_name_bboxes_.shape[0] != raw_color_bboxes_.shape[0]:
                print("raw_name_bboxes_ {0}".format(raw_name_bboxes_.shape))
                print("raw_color_bboxes_ {0}".format(raw_color_bboxes_.shape))
                a = 1
                assert a == 0, "raw is not equal"
            if names_.shape[0] != colors_.shape[0]:
                print("names_ {0}".format(names_.shape))
                print("colors_ {0}".format(colors_.shape))
                a = 1
                print("bboxes_ == ".format(bboxes_.shape))
                assert a == 0, "names_ is not equal"

        self.use_preset('evaluate')
        self.train()
        return bboxes_, names_, name_scores_, colors_, color_scores_
Ejemplo n.º 7
0
    def train_forward_net(self, img_names, imgs, indexs, bboxes, names, shapes,
                          colors, _, scale):

        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)
        #print("img_name {0}".format(img_name))
        #vis_image(imgs.cpu()[0])
        #plt.show()
        features = self.net.extractor(imgs)  ##vgg16 (1,512,37,50)
        #print(features.shape)
        rpn_locs, rpn_scores, rois, _, anchor = self.net.rpn(
            features, img_size, scale)
        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]
        name = names[0]
        color = colors[0]

        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs(For the training of head(classification network)) and forward

        sample_roi, gt_roi_loc, gt_roi_name, gt_roi_color = self.proposal_target(
            roi, at.to_np(bbox), at.to_np(name), at.to_np(color))
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = torch.zeros(len(sample_roi))
        roi_name_loc, roi_name_score = self.net.head_name(
            features, sample_roi, sample_roi_index)

        _, roi_color_score = self.net.head_color(features, sample_roi,
                                                 sample_roi_index)

        # ------------------ RPN losses -------------------#
        # Target for RPN => Anchor Target
        gt_rpn_loc, gt_rpn_label = self.anchor_target(at.to_np(bbox), anchor,
                                                      img_size)

        gt_rpn_label = at.to_tensor(gt_rpn_label).long()
        gt_rpn_loc = at.to_tensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data,
                                           self.conf.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = torch.nn.functional.cross_entropy(rpn_score,
                                                         gt_rpn_label.cuda(),
                                                         ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.to_np(rpn_score)[at.to_np(gt_rpn_label) > -1]
        self.rpn_cm.add(at.to_tensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        ##name##
        n_sample = roi_name_loc.shape[0]
        roi_name_loc = roi_name_loc.view(n_sample, -1, 4)
        name_roi_loc = roi_name_loc[torch.arange(0, n_sample).long().cuda(), \
                              at.to_tensor(gt_roi_name).long()] ## not one-hot
        gt_roi_name = at.to_tensor(gt_roi_name).long()
        gt_roi_loc = at.to_tensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(name_roi_loc.contiguous(),
                                           gt_roi_loc, gt_roi_name.data,
                                           self.conf.roi_sigma)
        roi_name_loss = nn.CrossEntropyLoss()(roi_name_score,
                                              gt_roi_name.cuda())

        self.roi_name_cm.add(at.to_tensor(roi_name_score, False),
                             gt_roi_name.data.long())

        ##color##
        gt_roi_color = at.to_tensor(gt_roi_color).long()
        roi_color_loss = nn.CrossEntropyLoss()(roi_color_score,
                                               gt_roi_color.cuda())

        self.roi_color_cm.add(at.to_tensor(roi_color_score, False),
                              gt_roi_color.data.long())

        ##sum up all loss##
        losses = [
            rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_name_loss,
            roi_color_loss
        ]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)
Ejemplo n.º 8
0
def get_combined_feature(feature,
                         bboxes,
                         use_spatial_feature=False,
                         roi_size=7,
                         spatial_scale=1 / 16,
                         flip=True):
    """
    :param feature: feature has passed extractor, shape[1,512,37,50]
    :param bboxes: shape[N, 4], N is num of object
    :return:
        combined_features: list of all combined feature(as same order as gt relation), is a list contains list of
        features in following type: [obj_a, obj_b, rel] or [obj_a, obj_b, rel, spatial_feature]
        rel_flip: list of rel that has been flipped
    """
    roi_pooling = RoIPooling2D(roi_size, roi_size,
                               spatial_scale)  # spatial scale is not important
    num_of_bbox = bboxes.shape[0]
    #set_trace()
    ##bbox_scaling into fature scale
    ##TODO
    scale_x_imgtofeature, scale_y_imgtofeature = float(
        feature.size(3) / 1000), float(feature.size(2) / 600)
    bboxes_f = np.zeros(
        bboxes.shape, dtype=int
    )  ##bboxes_f is bboxes after resized to scale of feture map(37,50)
    for (bbox, bbox_f) in zip(bboxes, bboxes_f):
        bbox_f[0] = int(bbox[0] * scale_y_imgtofeature)
        bbox_f[2] = int(bbox[2] * scale_y_imgtofeature)
        bbox_f[1] = int(bbox[1] * scale_x_imgtofeature)
        bbox_f[3] = int(bbox[3] * scale_x_imgtofeature)
        assert bbox_f[0] in range(feature.size(2)+1) and bbox_f[2] in range(feature.size(2)+1) \
               and bbox_f[1] in range(feature.size(3)+1) and bbox_f[3] in range(feature.size(3)+1), "bbox:{0}  {1}".format(bbox, feature.shape)

    ##start forward
    rel_flip = []
    combined_features = []
    for i_obj_a in range(num_of_bbox):
        for i_obj_b in range(i_obj_a + 1, num_of_bbox):
            bbox_f_a = bboxes_f[i_obj_a]  # in (ymin, xmin, ymax, xmax)
            bbox_f_b = bboxes_f[i_obj_b]
            rel_flip.append(False)
            if rd.random() > 0.5 and flip:  # randomly swap obj1 and obj2
                bbox_f_a, bbox_f_b = bbox_f_b, bbox_f_a
                rel_flip[-1] = True
            ##get union cordinate
            union_cord = np.zeros(4)
            union_cord[0] = min(bbox_f_a[0], bbox_f_b[0])
            union_cord[1] = min(bbox_f_a[1], bbox_f_b[1])
            union_cord[2] = max(bbox_f_a[2], bbox_f_b[2])
            union_cord[3] = max(bbox_f_a[3], bbox_f_b[3])

            ##spatial feature
            if use_spatial_feature:
                ##dual spatial mask
                dual_channel_feature = torch.zeros(2, 37, 50)  # need modified
                for ii, bbox in enumerate([bbox_f_a, bbox_f_b]):  # obj_a_first
                    dual_channel_feature[ii, bbox[0]:bbox[2],
                                         bbox[1]:bbox[3]] = 1
            ##combine features
            rois = np.stack([bbox_f_a, bbox_f_b, union_cord], axis=0)
            roi_indices = np.zeros(3)
            roi_indices = at.to_tensor(roi_indices).float()
            rois = at.to_tensor(rois).float()
            indices_and_rois = torch.cat([roi_indices[:, None], rois], dim=1)
            xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]]
            indices_and_rois = xy_indices_and_rois.contiguous()
            combined_feature_arr = roi_pooling(
                feature, indices_and_rois)  # in obj_a, obj_b, union oreder
            combined_feature = []
            for idx in range(combined_feature_arr.size(0)):
                combined_feature.append(
                    torch.unsqueeze(combined_feature_arr[idx],
                                    0))  #increase 1 dim
                assert combined_feature[-1].shape == (1, 512, 7, 7)
            combined_features.append(combined_feature)
    return combined_features, rel_flip