Example #1
0
    def forward(self):
        [clips, gt_bboxes, gt_label, vid_name, is_last] \
          = self.dataset.next_val_video()

        num_frames = clips.shape[0]
        r1 = []
        r2 = []
        for i in xrange(num_frames - self._depth + 1):
            curr_gt = np.mean(gt_bboxes[i:i + self._depth, 1:5], axis=0) / 16
            curr_gt = np.expand_dims(curr_gt, axis=0)
            overlaps = bbox_overlaps(
                np.ascontiguousarray(self.anchors, dtype=np.float),
                np.ascontiguousarray(curr_gt, dtype=np.float))
            max_overlaps = overlaps.max(axis=1)
            gt_argmax_overlaps = overlaps.argmax(axis=0)
            gt_max_overlaps = overlaps.max(axis=0)

            curr_labels = np.ones(self._anchor_dims[0] * self._anchor_dims[1] *
                                  self._anchor_dims[2]) * (-1)
            curr_labels[self.valid_idx[max_overlaps < 0.5]] = 0
            curr_labels[self.valid_idx[max_overlaps > 0.6]] = 1
            curr_labels[self.valid_idx[gt_argmax_overlaps]] = 1
            l = max_overlaps > 0.6
            l[gt_argmax_overlaps] = True
            ol = overlaps[l]
            pos_box = self.anchors[l]
            diff = bbox_transform(pos_box, curr_gt)
            r1.append(gt_max_overlaps)
            r2.append(np.abs(diff).max(axis=0))

        return r1, r2, is_last
Example #2
0
def _compute_targets(ex_rois, gt_rois):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 5

    return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
def _compute_targets(ex_rois, gt_rois):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 5

    return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
def _map(label, target, gt_bbox, l, n):
    diff = bbox_transform(target, gt_bbox)
    r_diff = np.zeros((n, l * 4))
    mask = np.zeros((n, l * 4))
    for i in xrange(len(label)):
        curr_label = int(label[i] - 1)
        r_diff[i, curr_label * 4:curr_label * 4 + 4] = diff[i]
        mask[i, curr_label * 4:curr_label * 4 + 4] = 1
    return r_diff, mask
    def _compute_target(self, ex_rois, gt_rois):
        """Compute bounding-box regression targets for an image."""
        assert ex_rois.size(0) == gt_rois.size(0)
        assert ex_rois.size(1) == 4
        assert gt_rois.size(1) == 4

        targets = bbox_transform(ex_rois, gt_rois)
        targets = ((targets - self.BBOX_NORMALIZE_MEANS.expand_as(targets)) /
                   self.BBOX_NORMALIZE_STDS.expand_as(targets))

        return targets
Example #6
0
    def compute_targets(self, ex_rois, gt_rois, query_label):
        """Compute bounding-box regression targets for an image."""
        assert ex_rois.shape[1] == 4
        assert gt_rois.shape[1] == 4

        targets = bbox_transform(ex_rois, gt_rois)
        if cfg.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            targets = ((targets - np.array(cfg.BBOX_NORMALIZE_MEANS))
                    / np.array(cfg.BBOX_NORMALIZE_STDS))
        query_bbox_target_data = np.hstack(
                (query_label[:, np.newaxis], targets)).astype(np.float32, copy=False)
        return query_bbox_target_data
Example #7
0
    def forward(self, bottom, top):
        [clips, labels, tmp_bboxes, box_idx] \
          = self.dataset.next_batch(self._batch_size, self._depth)
        batch_clip = clips.transpose((0, 4, 1, 2, 3))
        batch_labels = np.empty(
            (self._batch_size * self._depth, 1, self._anchor_dims[0] *
             self._anchor_dims[1] * self._anchor_dims[2]))
        batch_diff = np.empty(
            (self._batch_size * self._depth, 4, self._anchor_dims[0] *
             self._anchor_dims[1] * self._anchor_dims[2]))
        batch_mask = np.empty(
            (self._batch_size * self._depth, 4, self._anchor_dims[0] *
             self._anchor_dims[1] * self._anchor_dims[2]))

        for i in xrange(self._depth):
            box = tmp_bboxes[0, :, :]
            gt_bboxes = np.expand_dims((box[i] / 16), axis=0)

            overlaps = bbox_overlaps(
                np.ascontiguousarray(self.anchors, dtype=np.float),
                np.ascontiguousarray(gt_bboxes, dtype=np.float))
            max_overlaps = overlaps.max(axis=1)
            gt_argmax_overlaps = overlaps.argmax(axis=0)

            curr_labels = np.ones(self._anchor_dims[0] * self._anchor_dims[1] *
                                  self._anchor_dims[2]) * (-1)
            curr_labels[self.valid_idx[max_overlaps < 0.5]] = 0
            curr_labels[self.valid_idx[max_overlaps > 0.6]] = 1
            curr_labels[self.valid_idx[gt_argmax_overlaps]] = 1
            batch_labels[i, 0] = curr_labels.reshape(
                (self._anchor_dims[1], self._anchor_dims[2],
                 self._anchor_dims[0])).transpose((2, 0, 1)).reshape(-1)

            pos_boxes = self.anchors[max_overlaps > 0.6]
            curr_diff = np.zeros((self._anchor_dims[0] * self._anchor_dims[1] *
                                  self._anchor_dims[2], 4))
            curr_diff[self.valid_idx[max_overlaps > 0.6]] \
              = bbox_transform(pos_boxes, gt_bboxes)
            batch_diff[i] = curr_diff.reshape(
                (self._anchor_dims[1], self._anchor_dims[2],
                 self._anchor_dims[0], 4)).transpose((3, 2, 0, 1)).reshape(
                     (4, -1))
            curr_mask = batch_labels[i]
            curr_mask[curr_mask < 1] = 0
            batch_mask[i] = np.repeat(curr_mask, 4, axis=0)

        top[0].data[...] = batch_clip.astype(np.float32, copy=False)
        top[1].data[...] = batch_labels.astype(np.float32, copy=False)
        top[2].data[...] = batch_diff.astype(np.float32, copy=False)
        top[3].data[...] = batch_mask.astype(np.float32, copy=False)
Example #8
0
    def forward(self, bottom, top):
        [clips, labels, tmp_bboxes, box_idx] \
          = self.dataset.next_batch(self._batch_size, self._depth)
        batch_clip = clips.transpose((0, 4, 1, 2, 3))
        batch_tois = np.empty((0, 5))
        batch_label = np.empty((0, 1))
        batch_diff = np.empty((0, 4))
        batch_mask = np.empty((0, 4))
        batch_toi2 = np.empty((0, 5))

        for i in xrange(self._depth):
            box = tmp_bboxes[0, :, :]
            gt_bboxes = np.expand_dims((box[i] / 16), axis=0)

            overlaps = bbox_overlaps(
                np.ascontiguousarray(self.anchors, dtype=np.float),
                np.ascontiguousarray(gt_bboxes, dtype=np.float))
            max_overlaps = overlaps.max(axis=1)
            gt_argmax_overlaps = overlaps.argmax(axis=0)

            curr_labels = np.ones(self.anchors.shape[0]) * (-1)
            curr_labels[max_overlaps < 0.4] = 0
            curr_labels[max_overlaps >= 0.6] = 1
            curr_labels[gt_argmax_overlaps] = 1

            fg_inds = np.where(curr_labels > 0)[0]
            num_fg = len(fg_inds)
            if len(fg_inds) > 4:
                fg_inds = np.random.choice(fg_inds, size=(4))
                num_fg = 4
            bg_inds = np.where(curr_labels == 0)[0]
            bg_inds = np.random.choice(bg_inds, size=(num_fg))
            curr_inds = np.concatenate((fg_inds, bg_inds))
            curr_i = np.ones((num_fg * 2, 1)) * i
            curr_tois = \
              np.concatenate((curr_i, self.anchors[curr_inds]), axis=1)
            curr_toi2 = np.concatenate((np.zeros(
                (num_fg * 2, 1)), self.anchors[curr_inds]),
                                       axis=1)
            curr_l = np.expand_dims(curr_labels[curr_inds], axis=1)
            num_samples = 2 * num_fg
            fg_diff = bbox_transform(self.anchors[fg_inds], gt_bboxes)
            curr_diff = np.zeros((num_samples, 4))
            curr_diff[0:num_fg] = fg_diff
            curr_mask = np.repeat(curr_l, 4, axis=1)

            batch_tois = np.concatenate((batch_tois, curr_tois), axis=0)
            batch_label = np.concatenate((batch_label, curr_l), axis=0)
            batch_diff = np.concatenate((batch_diff, curr_diff), axis=0)
            batch_mask = np.concatenate((batch_mask, curr_mask), axis=0)
            batch_toi2 = np.concatenate((batch_toi2, curr_toi2), axis=0)

        top[1].reshape(*batch_tois.shape)
        top[2].reshape(*batch_label.shape)
        top[3].reshape(*batch_diff.shape)
        top[4].reshape(*batch_mask.shape)
        top[5].reshape(*batch_toi2.shape)

        top[0].data[...] = batch_clip.astype(np.float32, copy=False)
        top[1].data[...] = batch_tois.astype(np.float32, copy=False)
        top[2].data[...] = batch_label.astype(np.float32, copy=False)
        top[3].data[...] = batch_diff.astype(np.float32, copy=False)
        top[4].data[...] = batch_mask.astype(np.float32, copy=False)
        top[5].data[...] = batch_toi2.astype(np.float32, copy=False)
Example #9
0
    def _data_generator(self, batch_size):
        i = 0
        n = self.sample_nums
        while True:
            total_img_data = []
            total_labels = []
            total_deltas = []
            for b in range(batch_size):
                if i == 0:
                    self._random_shuffle()
                annotation = self._annotations[i]
                image_path, gt_boxes = self._parse_annotation(annotation)
                img = cv2.imread(image_path)
                # height/width/channel
                height, width, _ = img.shape
                # img resize
                img = cv2.resize(img, im_size, interpolation=cv2.INTER_CUBIC)

                # BGR -> RGB 做简单处理
                img = img[:, :, (2, 1, 0)]
                img = img.astype(np.float32)
                img = img / 255
                # gt_box resize
                gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * (im_size[0] / width)
                gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * (im_size[1] / height)

                # regions 里面 是 x1, y1, x2, y2
                _, regions = selective_search(img, scale=200, sigma=0.9, min_size=50)

                rects = np.asarray([list(region['rect']) for region in regions])
                selected_imgs = []
                candidates = set()
                # 过滤掉一些框
                for r in rects:
                    x1, y1, x2, y2 = r
                    x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(round(x2)), int(round(y2))
                    if (x1, y1, x2, y2) in candidates:
                        continue
                    if (x2 - x1) * (y2 - y1) < 220:
                        continue
                    crop_img = img[y1:y2, x1:x2, :]
                    # 裁剪后进行resize
                    crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC)
                    selected_imgs.append(crop_img)
                    candidates.add((x1, y1, x2, y2))
                rects = np.asarray([list(candidate) for candidate in candidates])
                # 将 gt_boxes 添加进来
                for idx in range(len(gt_boxes)):
                    x1, y1, x2, y2 = gt_boxes[idx, 0:4]
                    x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(round(x2)), int(round(y2))
                    # 裁剪后进行resize
                    crop_img = img[y1:y2, x1:x2, :]
                    crop_img = cv2.resize(crop_img, im_size, interpolation=cv2.INTER_CUBIC)
                    selected_imgs.append(crop_img)

                rects = np.vstack((rects, gt_boxes[:, 0:4]))
                # cal iou
                overlaps = bbox_overlaps(rects, gt_boxes)
                # 选出与哪个gt_box iou最大的索引位置
                argmax_overlaps = np.argmax(overlaps, axis=1)
                # judge cls
                max_overlaps = np.max(overlaps, axis=1)
                keep = np.where(max_overlaps > threshold)[0]
                labels = np.empty(len(argmax_overlaps))
                labels.fill(0)
                labels[keep] = gt_boxes[argmax_overlaps[keep], 4]
                # do reg
                deltas = bbox_transform(rects, gt_boxes[argmax_overlaps, 0:4])

                total_deltas.append(deltas)
                total_labels.append(labels)
                total_img_data.append(selected_imgs)
                i = (i + 1) % n
            total_img_data = np.concatenate(total_img_data, axis=0)
            total_labels = np.concatenate(total_labels, axis=0)
            total_deltas = np.concatenate(total_deltas, axis=0)
            yield total_img_data, total_labels, total_deltas

#
# voc_data = VocData('~/segment_data', 2007, 'train', './data/voc_classes.txt')
# g = voc_data.data_generator_wrapper()
# x, y, z = next(g)
# print(x.shape)
# print(y.shape)
# print(z.shape)
Example #10
0
 def _transform_regions(self, regions, gt_boxes):
     regions_target = bbox_transform(regions, gt_boxes)
     return regions_target
Example #11
0
 def _compute_targets(self, ex_rois, gt_rois):
     assert ex_rois.shape[0] == gt_rois.shape[0]
     assert ex_rois.shape[1] == 4
     assert gt_rois.shape[1] == 5
     return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32,
                                                           copy=False)
Example #12
0
    def _data_generator(self, batch_size, is_svm):
        i = 0
        n = self.samples_num
        while True:
            total_img_data = []
            total_labels = []
            total_deltas = []
            for b in range(batch_size):
                if i == 0:
                    self._random_shuffle()
                annotation = self._annotations[i]
                image_path, gt_boxes = self._parse_annotation(annotation)
                img = cv2.imread(image_path)
                # height/width/channel
                height, width, _ = img.shape
                # img resize
                img = cv2.resize(img, im_size, interpolation=cv2.INTER_CUBIC)

                # BGR -> RGB 做简单处理
                img = img[:, :, (2, 1, 0)]
                img = img.astype(np.float32)
                img = img / 255.
                # gt_box resize
                gt_boxes[:,
                         [0, 2]] = gt_boxes[:, [0, 2]] * (im_size[0] / width)
                gt_boxes[:,
                         [1, 3]] = gt_boxes[:, [1, 3]] * (im_size[1] / height)

                # regions 里面 是 x1, y1, x2, y2
                _, regions = selective_search(img,
                                              scale=200,
                                              sigma=0.9,
                                              min_size=50)

                rects = np.asarray(
                    [list(region['rect']) for region in regions])
                selected_imgs = []
                candidates = set()
                # 过滤掉一些框
                for r in rects:
                    x1, y1, x2, y2 = r
                    x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(
                        round(x2)), int(round(y2))
                    if (x1, y1, x2, y2) in candidates:
                        continue
                    if (x2 - x1) * (y2 - y1) < 220:
                        continue
                    crop_img = img[y1:y2, x1:x2, :]
                    # 裁剪后进行resize
                    crop_img = cv2.resize(crop_img,
                                          im_size,
                                          interpolation=cv2.INTER_CUBIC)
                    selected_imgs.append(crop_img)
                    candidates.add((x1, y1, x2, y2))

                rects = [list(candidate) for candidate in candidates]
                # 将 gt_boxes 添加进来
                for idx in range(len(gt_boxes)):
                    x1, y1, x2, y2 = gt_boxes[idx, 0:4]
                    x1, y1, x2, y2 = int(round(x1)), int(round(y1)), int(
                        round(x2)), int(round(y2))
                    # 裁剪后进行resize
                    crop_img = img[y1:y2, x1:x2, :]
                    try:
                        crop_img = cv2.resize(crop_img,
                                              im_size,
                                              interpolation=cv2.INTER_CUBIC)
                        selected_imgs.append(crop_img)
                        rects.append(gt_boxes[idx, 0:4])
                    except:
                        continue

                rects = np.asarray(rects)
                # cal iou
                overlaps = bbox_overlaps(rects, gt_boxes)
                # 选出与哪个gt_box iou最大的索引位置
                argmax_overlaps = np.argmax(overlaps, axis=1)
                # judge cls
                max_overlaps = np.max(overlaps, axis=1)
                threshold = cfg.THRESHOLD if is_svm else cfg.FINE_TUNE_THRESHOLD
                keep = np.where(max_overlaps >= threshold)[0]
                labels = np.empty(len(argmax_overlaps))
                # svm和fine-tune的iou取值是不一样的
                if is_svm:
                    # 因为svm非常适合小训练集 所以论文中严格限制iou范围 减少svm训练样本集
                    # 用 -1 填充
                    labels.fill(-1)
                    # bg_ids = np.where(max_overlaps < )
                    # ground - truth样本作为正样本  且IoU大于0.3的“hard negatives”,
                    # 背景
                    bg_ids = np.where(max_overlaps > threshold)[0]
                    labels[bg_ids] = 0
                    # gt 为正样本  这里用>0.7来当做正样本
                    fg_ids = np.where(max_overlaps > 0.7)
                    labels[fg_ids] = gt_boxes[argmax_overlaps[fg_ids], 4]
                else:
                    labels.fill(0)
                    # 对于大于指定threshold 前景类别
                    labels[keep] = gt_boxes[argmax_overlaps[keep], 4]
                # to something
                deltas = bbox_transform(rects, gt_boxes[argmax_overlaps, 0:4])
                total_deltas.append(deltas)
                total_labels.append(labels)
                total_img_data.append(selected_imgs)
                i = (i + 1) % n
            total_img_data = np.concatenate(total_img_data, axis=0)
            total_labels = np.concatenate(total_labels, axis=0)
            total_deltas = np.concatenate(total_deltas, axis=0)
            yield total_img_data, total_labels, total_deltas