Esempio n. 1
0
 def forward(self, x, rois, roi_indices):
     roi_indices = at.totensor(roi_indices).float()
     rois = at.totensor(rois).float()
     indices_and_rois = t.cat([roi_indices[:, None], rois], dim=1)
     xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]]
     indices_and_rois = xy_indices_and_rois.contiguous()
     pool = self.roi(x, indices_and_rois)
     pool = pool.view(pool.size(0), -1)
     fc7 = self.classifier(pool)
     roi_cls_locs = self.cls_loc(fc7)
     roi_scores = self.scores(fc7)
     return roi_cls_locs, roi_scores
    def predict(self, imgs, sizes=None, visualize=False):
        """
        在计算mAP的时候使用
        :param imgs: 一个batch的图片
        :param sizes: batch中每张图片的输入尺寸
        :return: 返回所有一个batch中所有图片的坐标,类,类概率值 三个值都是list型数据,里面包含的是numpy数据
        """

        self.eval()
        if visualize:
            self.use_preset('visualize')
        prepared_imgs = imgs

        bboxes = list()
        labels = list()
        scores = list()

        for img, size in zip(prepared_imgs, sizes):
            img = at.totensor(img[None]).float()
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois = self(img, scale=scale)
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            mean = torch.Tensor(self.loc_normalize_mean).repeat(self.n_class)[None]
            std = torch.Tensor(self.loc_normalize_std).repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            # 从这里其实可以看出,roi_cls_loc是位置偏差,是用来计算更接近的位置,而不是位置,
            cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # 这里应该是控制cls_bbox的边界,在min和max之间
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)
            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        # 这里返回的是预测图片的bbox,labels,scores
        return bboxes, labels, scores
Esempio n. 3
0
 def forward(self, x):
     y = x.view(-1, x.size(1), x.size(2) * x.size(3))  # b x c x hw
     vladres = np.zeros([y.size(0), self.k, y.size(-1)])
     for i in range(y.size(0)):
         feature = tonumpy(y[i])
         c, hw = feature.shape
         self.model.fit(feature)
         clusters = self.model.cluster_centers_
         labels = self.model.labels_
         for j in range(c):
             label = labels[j]
             vladres[i, label, :] += feature[j] - clusters[label]
     vladres = totensor(vladres)
     return vladres
Esempio n. 4
0
 def forward(self, features, rois):
     rois = at.totensor(rois).float()
     roi_list = []
     for roi in rois:
         # 这个计算的应该是按比例缩放roi尺寸到特征图上面
         # 比如原图为(300, 500) roi(34, 76, 82, 95),那么roi是在原图上框出来的框,现在要转换到特征图上的框
         # 特征图比如说是(37, 50) 给roi除以16的位置就是特征图所在的位置
         # roi / 16 = (34 / 16, 76 / 16, 82 / 16, 95 / 16) 求整 (2, 4, 5, 5)
         # 即对应到特征图的位置就是(2,4,5,5) 这里的位置就是坐标就是索引,即索引(2:5+1,4:5+1)是高和宽
         # 为什么加一,我猜是因为怕ymin和ymax值一样,长度为0了
         roi_part = features[:, :, (roi[0] / self.spatial_scale).int(): (roi[2] / self.spatial_scale).int() + 1,
                    (roi[3] / self.spatial_scale).int(): (roi[1] / self.spatial_scale).int() + 1]
         roi_part = nn.AdaptiveMaxPool2d((7, 1))(roi_part)
         roi_list.append(roi_part)
     pool = torch.cat(roi_list)  # B,C,7,7
     pool = pool.reshape(pool.shape[0], -1)  # B,C*7*7
     return pool
Esempio n. 5
0
    def forward(self, x, rois):
        rois = at.totensor(rois).float()
        roi_list = []
        for roi in rois:
            # 这个计算的应该是按比例缩放roi尺寸到特征图上面
            # 比如原图为(300, 500) roi(34, 76, 82, 95),那么roi是在原图上框出来的框,现在要转换到特征图上的框
            # 特征图比如说是(37, 50) 给roi除以16的位置就是特征图所在的位置
            # roi / 16 = (34 / 16, 76 / 16, 82 / 16, 95 / 16) 求整 (2, 4, 5, 5)
            # 即对应到特征图的位置就是(2,4,5,5) 这里的位置就是坐标就是索引,即索引(2:5+1,4:5+1)是高和宽
            # 为什么加一,我猜是因为怕ymin和ymax值一样,长度为0了
            roi_part = x[:, :, (roi[0] * self.spatial_scale).int(): (roi[2] * self.spatial_scale).int() + 1,
                       (roi[1] * self.spatial_scale).int(): (roi[3] * self.spatial_scale).int() + 1]
            roi_part = nn.AdaptiveMaxPool2d((7, 7))(roi_part)
            roi_list.append(roi_part)
        pool = torch.cat(roi_list)  # B,C,7,7
        pool = pool.reshape(pool.shape[0], -1)  # B,C*7*7

        # pool(B, C*7*7)
        # pool = self.roi(x, rois)
        fc7 = self.classifier(pool)
        roi_cls_locs = self.cls_loc(fc7)
        roi_scores = self.cls_score(fc7)
        # 返回roi修正系数和分数
        return roi_cls_locs, roi_scores
Esempio n. 6
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))


                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)


                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        trainer.vis.log(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13:
            break
Esempio n. 7
0
    def forward(self, imgs, bboxes, labels, scale):
        # 每次进来一张图,对应的bbox,对应的label
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)
        # 求出特征图
        features = self.faster_rcnn.extractor(imgs)
        # 求出rpn网络输出的损失,roi,所有anchor
        # rpn_locs(N, H W A, 4)
        rpn_locs, rpn_scores, rois, anchor = self.faster_rcnn.rpn(
            features, img_size, scale)
        # bbox(R, 4) label(R, 1)
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois
        # bbox和label是真实的bbox和label
        # roi是预测的bbox
        # 返回的是选择后的roi以及roi对应真实bbox的位置偏差,以及roi所给的label
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)

        # 应该是返回在特征图上的roi全连接之后的位置和概率
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi)

        # rpn loss
        # 给anchor标记标签,loc上默认值是0,正样本为位移偏移量
        # label上默认值为-1,正样本为1
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)

        # rpn_loc(H * W * A, 4)
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)
        # 计算rpn网络的分类损失,忽略label = -1的
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label,
                                       ignore_index=-1)

        # ROI loss
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long(),
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()
        gt_roi_loc = at.totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc, gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label)

        losses = {
            'rpn_loc_loss': rpn_loc_loss,
            'rpn_cls_loss': rpn_cls_loss,
            'roi_loc_loss': roi_loc_loss,
            'roi_cls_loss': roi_cls_loss
        }
        losses['total_loss'] = sum(losses.values())
        return losses
Esempio n. 8
0
    def forward(self, imgs, bboxes, labels, scale):
        """Forward Faster R-CNN and calculate losses.
        Here are notations used.
        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.
        Currently, only :math:`N=1` is supported.
        Args:
            imgs (~torch.autograd.Variable): A variable with a batch of images.
            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            labels (~torch.autograd..Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            scale (float): Amount of scaling applied to
                the raw image during preprocessing.
        Returns:
            namedtuple of 5 losses
        """
        n = bboxes.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)

        features = self.faster_rcnn.extractor(imgs)

        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        bbox = bboxes[0]
        label = labels[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs and forward
        # it's fine to break the computation graph of rois,
        # consider them as constant input
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean,
            self.loc_normalize_std)
        # NOTE it's all zero because now it only support for batch=1 now
        sample_roi_index = t.zeros(len(sample_roi))
        roi_cls_loc, roi_score = self.faster_rcnn.head(features, sample_roi,
                                                       sample_roi_index)

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            at.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = at.totensor(gt_rpn_label).long()
        gt_rpn_loc = at.totensor(gt_rpn_loc)
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                           gt_rpn_label.data, self.rpn_sigma)

        # NOTE: default value of ignore_index is -100 ...
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(),
                                       ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False),
                        _gt_rpn_label.data.long())

        # ------------------ ROI losses (fast rcnn loss) -------------------#
        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
        roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
                              at.totensor(gt_roi_label).long()]
        gt_roi_label = at.totensor(gt_roi_label).long()
        gt_roi_loc = at.totensor(gt_roi_loc)

        roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc,
                                           gt_roi_label.data, self.roi_sigma)

        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

        self.roi_cm.add(at.totensor(roi_score, False),
                        gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses)