コード例 #1
0
    def __call__(self, in_data):
        img, bbox, label = in_data
        _, H, W = img.shape

        # random brightness and contrast
        img = random_distort(img)

        # rotate image
        # return a tuple whose elements are rotated image, param.
        # k (int in param)represents the number of times the image is rotated by 90 degrees.
        img, params = transforms.random_rotate(img, return_param=True)
        # restore the new hight and width
        _, t_H, t_W = img.shape
        # rotate bbox based on renewed parameters
        bbox = rotate_bbox(bbox, (H, W), params['k'])
        img = self.faster_rcnn.prepare(img)
        # prepares the image to match the size of the image to be input into the RCNN
        _, o_H, o_W = img.shape
        # resize the bounding box according to the image resize
        bbox = transforms.resize_bbox(bbox, (t_H, t_W), (o_H, o_W))

        # horizontally & vertical flip
        # simutaneously flip horizontally and vertically of the image
        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             y_random=True,
                                             return_param=True)
        # flip the bounding box with respect to the parameter
        bbox = transforms.flip_bbox(bbox, (o_H, o_W),
                                    x_flip=params['x_flip'],
                                    y_flip=params['y_flip'])

        scale = o_H / t_H

        return img, bbox, label, scale
    def __call__(self, in_data):
        # There are five data augmentation steps
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping

        img, bbox, label = in_data

        # 3. Random cropping
        if self.random_crop and np.random.rand() > 0.5:
            next_img, param = random_crop_with_bbox_constraints(
                img,
                bbox,
                min_scale=min(self.crop_rate),
                max_scale=max(self.crop_rate),
                return_param=True)
            next_bbox, param = transforms.crop_bbox(bbox,
                                                    y_slice=param['y_slice'],
                                                    x_slice=param['x_slice'],
                                                    allow_outside_center=False,
                                                    return_param=True)
            if (len(label[param['index']]) != 0):
                label = label[param['index']]
                img, bbox = next_img, next_bbox

        # 4. Resizing with random interpolatation
        _, H, W = img.shape
        img = transforms.resize(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Random horizontal flipping
        if self.flip:
            img, params = transforms.random_flip(img,
                                                 x_random=True,
                                                 return_param=True)
            bbox = transforms.flip_bbox(bbox, (self.size, self.size),
                                        x_flip=params['x_flip'])

        img -= self.mean
        img /= self.std

        _, height, width = img.shape
        ymin = bbox[:, 0]
        xmin = bbox[:, 1]
        ymax = bbox[:, 2]
        xmax = bbox[:, 3]
        one_hot_label = np.eye(self.n_class)[label]
        xs = (xmin + (xmax - xmin) // 2) / width
        ws = (xmax - xmin) / width
        ys = (ymin + (ymax - ymin) // 2) / height
        hs = (ymax - ymin) / height
        t = [{
            'label': l,
            'x': x,
            'w': w,
            'y': y,
            'h': h,
            'one_hot_label': hot
        } for l, x, w, y, h, hot in zip(label, xs, ws, ys, hs, one_hot_label)]
        return img, t
コード例 #3
0
    def __call__(self, in_data):
        img, bbox, label = in_data

        # 1. Color augumentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(
                img, fill=self.mean, return_param=True)
            bbox = transforms.translate_bbox(
                bbox, y_offset=param["y_offset"], x_offset=param["x_offset"])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(
            img, bbox, return_param=True)
        bbox, param = transforms.crop_bbox(
            bbox, y_slice=param["y_slice"], x_slice=param["x_slice"],
            allow_outside_center=False, return_param=True)
        label = label[param["index"]]

        # 4. Resizing with random interpolation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Transformation for SSD network input
        img -= self.mean
        mb_loc, mb_lab = self.coder.encode(bbox, label)

        return img, mb_loc, mb_lab
コード例 #4
0
    def predict(self, imgs, k=100, detail=False, output_index=-1):
        x = []
        sizes = []
        for img in imgs:
            _, H, W = img.shape
            img = self._prepare(img)
            x.append(self.xp.array(img))
            sizes.append((H, W))
        with chainer.using_config('train',
                                  False), chainer.function.no_backprop_mode():
            x = Variable(self.xp.stack(x))
            output = self.forward(x)[output_index]

        bboxes = []
        labels = []
        scores = []
        output['hm'] = F.sigmoid(output['hm'])
        output['hm'].to_cpu()
        for i in range(len(imgs)):
            bbox, label, score = self._decode_output(output, i, k)
            bbox = transforms.resize_bbox(bbox, (self.insize, self.insize),
                                          sizes[i])
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        if detail:
            return bboxes, labels, scores, output
        else:
            return bboxes, labels, scores
コード例 #5
0
    def predict(self, imgs):
        x = list()
        sizes = list()
        for img in imgs:
            _, H, W = img.shape
            img = self._prepare(img)
            x.append(self.xp.array(img))
            sizes.append((H, W))

        with chainer.using_config('train', False), \
                chainer.function.no_backprop_mode():
            x = chainer.Variable(self.xp.stack(x))
            arm_locs, arm_confs, odm_locs, odm_confs = self(x)
        arm_locs, arm_confs = arm_locs.array, arm_confs.array
        odm_locs, odm_confs = odm_locs.array, odm_confs.array

        bboxes = list()
        labels = list()
        scores = list()
        for arm_loc, arm_conf, odm_loc, odm_conf, size in zip(
                arm_locs, arm_confs, odm_locs, odm_confs, sizes):
            bbox, label, score = self.coder.decode(arm_loc, arm_conf, odm_loc,
                                                   odm_conf, self.nms_thresh,
                                                   self.score_thresh)
            bbox = transforms.resize_bbox(bbox, (self.insize, self.insize),
                                          size)
            bboxes.append(chainer.cuda.to_cpu(bbox))
            labels.append(chainer.cuda.to_cpu(label))
            scores.append(chainer.cuda.to_cpu(score))

        return bboxes, labels, scores
コード例 #6
0
    def __call__(self, in_data):
        img, bbox, label = in_data

        # 1. Color augumentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(
                img, fill=self.mean, return_param=True)
            bbox = transforms.translate_bbox(
                bbox, y_offset=param["y_offset"], x_offset=param["x_offset"])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(
            img, bbox, return_param=True)
        bbox, param = transforms.crop_bbox(
            bbox, y_slice=param["y_slice"], x_slice=param["x_slice"],
            allow_outside_center=False, return_param=True)
        label = label[param["index"]]

        # 4. Resizing with random interpolation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Transformation for SSD network input
        img -= self.mean
        mb_loc, mb_lab = self.coder.encode(bbox, label)

        return img, mb_loc, mb_lab
コード例 #7
0
ファイル: transform.py プロジェクト: maeshu/jsk_apc
    def __call__(self, in_data):
        assert len(in_data) == 6
        img, bbox, label, mask, lbl_vis, lbl_occ = in_data

        # H, W, C -> C, H, W
        img = img.transpose(2, 0, 1)
        lbl_occ = lbl_occ.transpose(2, 0, 1)

        if not self.train:
            return img, bbox, label, mask, lbl_vis, lbl_occ

        imgs, sizes, scales = self.mask_rcnn.prepare([img])
        img = imgs[0]
        H, W = sizes[0]
        scale = scales[0]
        # _, o_H, o_W = img.shape

        o_H, o_W = int(round(scale * H)), int(round(scale * W))

        if len(bbox) > 0:
            bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))
        if len(mask) > 0:
            mask = transforms.resize(mask, size=(o_H, o_W), interpolation=0)
            mask = mask.transpose(1, 2, 0)
            mask = pad_multiple_of(mask, mode='constant', constant_values=-1)
            mask = mask.transpose(2, 0, 1)
            assert mask.shape[1:] == img.shape[1:]

        lbl_vis = transforms.resize(lbl_vis[None],
                                    size=(o_H, o_W),
                                    interpolation=0)[0]
        lbl_occ = transforms.resize(lbl_occ, size=(o_H, o_W), interpolation=0)
        lbl_vis = pad_multiple_of(lbl_vis, mode='constant', constant_values=-1)
        lbl_occ = lbl_occ.transpose(1, 2, 0)
        lbl_occ = pad_multiple_of(lbl_occ, mode='constant', constant_values=-1)
        lbl_occ = lbl_occ.transpose(2, 0, 1)
        assert lbl_vis.shape == img.shape[1:]
        assert lbl_occ.shape[1:] == img.shape[1:]

        # # horizontally flip
        # img, params = transforms.random_flip(
        #     img, x_random=True, return_param=True)
        # bbox = transforms.flip_bbox(
        #     bbox, (o_H, o_W), x_flip=params['x_flip'])
        # if mask.ndim == 2:
        #     mask = transforms.flip(
        #         mask[None, :, :], x_flip=params['x_flip'])[0]
        # else:
        #     mask = transforms.flip(mask, x_flip=params['x_flip'])
        # lbl_vis = transforms.flip(lbl_vis[None], x_flip=params['x_flip'])[0]
        # lbl_occ = transforms.flip(lbl_occ, x_flip=params['x_flip'])

        keep = (mask == 1).sum(axis=(1, 2)) > 0
        bbox = bbox[keep]
        label = label[keep]
        mask = mask[keep]

        return img, bbox, label, mask, scale, lbl_vis, lbl_occ
コード例 #8
0
    def __call__(self, in_data):
        # There are five data augmentation steps
        # 1. Color augmentation
        # 2. Random expansion
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping
        # 6. Random vertical flipping

        img, bbox, label = in_data

        # 1. Color augmentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(img,
                                                  fill=self.mean,
                                                  return_param=True)
            bbox = transforms.translate_bbox(bbox,
                                             y_offset=param['y_offset'],
                                             x_offset=param['x_offset'])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(img,
                                                       bbox,
                                                       return_param=True)
        bbox, param = transforms.crop_bbox(bbox,
                                           y_slice=param['y_slice'],
                                           x_slice=param['x_slice'],
                                           allow_outside_center=False,
                                           return_param=True)
        label = label[param['index']]

        # 4. Resizing with random interpolatation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Random horizontal flipping
        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox, (self.size, self.size),
                                    x_flip=params['x_flip'])

        # 6. Random vertical flipping
        img, params = transforms.random_flip(img,
                                             y_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox, (self.size, self.size),
                                    y_flip=params['y_flip'])

        # Preparation for SSD network
        img -= self.mean
        mb_loc, mb_label = self.coder.encode(bbox, label)

        return img, mb_loc, mb_label
コード例 #9
0
ファイル: ssd.py プロジェクト: zutshianand/chainercv
    def predict(self, imgs):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bounding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """

        x = []
        sizes = []
        for img in imgs:
            _, H, W = img.shape
            img = self._prepare(img)
            x.append(self.xp.array(img))
            sizes.append((H, W))

        with chainer.using_config('train', False), \
                chainer.function.no_backprop_mode():
            x = chainer.Variable(self.xp.stack(x))
            mb_locs, mb_confs = self.forward(x)
        mb_locs, mb_confs = mb_locs.array, mb_confs.array

        bboxes = []
        labels = []
        scores = []
        for mb_loc, mb_conf, size in zip(mb_locs, mb_confs, sizes):
            bbox, label, score = self.coder.decode(mb_loc, mb_conf,
                                                   self.nms_thresh,
                                                   self.score_thresh)
            bbox = transforms.resize_bbox(bbox, (self.insize, self.insize),
                                          size)
            bboxes.append(chainer.backends.cuda.to_cpu(bbox))
            labels.append(chainer.backends.cuda.to_cpu(label))
            scores.append(chainer.backends.cuda.to_cpu(score))

        return bboxes, labels, scores
コード例 #10
0
    def __call__(self, in_data):
        if len(in_data) == 6:
            img, bbox, label, mask, crowd, area = in_data
        elif len(in_data) == 4:
            img, bbox, label, mask = in_data
        else:
            raise ValueError

        img = img.transpose(2, 0, 1)  # H, W, C -> C, H, W

        if not self.train:
            if len(in_data) == 6:
                return img, bbox, label, mask, crowd, area
            elif len(in_data) == 4:
                return img, bbox, label, mask
            else:
                raise ValueError

        imgs, sizes, scales = self.mask_rcnn.prepare([img])
        # print(type(imgs))
        # print(type(sizes))
        # print(type(scales))

        img = imgs[0]
        H, W = sizes[0]
        scale = scales[0]
        _, o_H, o_W = img.shape

        if len(bbox) > 0:
            bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))
        if len(mask) > 0:
            mask = transforms.resize(
                mask, size=(o_H, o_W), interpolation=0)

        # # horizontally flip
        # img, params = transforms.random_flip(
        #     img, x_random=True, return_param=True)
        # bbox = transforms.flip_bbox(
        #     bbox, (o_H, o_W), x_flip=params['x_flip'])
        # if mask.ndim == 2:
        #     mask = transforms.flip(
        #         mask[None, :, :], x_flip=params['x_flip'])[0]
        # else:
        #     mask = transforms.flip(mask, x_flip=params['x_flip'])

        # horizontally and vartically flip
        img, params = transforms.random_flip(
            img, y_random=True, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (o_H, o_W), y_flip=params['y_flip'], x_flip=params['x_flip'])
        if mask.ndim == 2:
            mask = transforms.flip(
                mask[None, :, :], y_flip=params['y_flip'], x_flip=params['x_flip'])[0]
        else:
            mask = transforms.flip(mask, y_flip=params['y_flip'], x_flip=params['x_flip'])

        return img, bbox, label, mask, scale, sizes
コード例 #11
0
    def __call__(self, in_data):
        # 5段階のステップでデータの水増しを行う
        # 1. 色の拡張
        # 2. ランダムな拡大
        # 3. ランダムなトリミング
        # 4. ランダムな補完の再補正
        # 5. ランダムな水平反転

        img, bbox, label = in_data

        # 1. 色の拡張
        # 明るさ,コントラスト,彩度,色相を組み合わせ,データ拡張をする
        img = random_distort(img)

        # 2. ランダムな拡大
        if np.random.randint(2):
            # キャンバスの様々な座標に入力画像を置いて,様々な比率の画像を生成し,bounding boxを更新
            img, param = transforms.random_expand(img,
                                                  fill=self.mean,
                                                  return_param=True)
            bbox = transforms.translate_bbox(bbox,
                                             y_offset=param['y_offset'],
                                             x_offset=param['x_offset'])

        # 3. ランダムなトリミング
        img, param = random_crop_with_bbox_constraints(img,
                                                       bbox,
                                                       return_param=True)
        # トリミングされた画像内にbounding boxが入るように調整
        bbox, param = transforms.crop_bbox(bbox,
                                           y_slice=param['y_slice'],
                                           x_slice=param['x_slice'],
                                           allow_outside_center=False,
                                           return_param=True)
        label = label[param['index']]

        # 4. ランダムな補完の再補正
        ## 画像とbounding boxのリサイズ
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. ランダムな水平反転
        ## 画像とbounding boxをランダムに水平方向に反転
        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox, (self.size, self.size),
                                    x_flip=params['x_flip'])

        # SSDのネットワークに入力するための準備の処理
        img -= self.mean
        ## SSDに入力するためのloc(デフォルトbounding boxのオフセットとスケール)と
        ## mb_label(クラスを表す配列)を出力
        mb_loc, mb_label = self.coder.encode(bbox, label)

        return img, mb_loc, mb_label
コード例 #12
0
    def test_resize_bbox(self):
        bbox = np.random.uniform(low=0., high=32., size=(10, 5))

        out = resize_bbox(bbox, input_shape=(32, 32), output_shape=(64, 128))
        bbox_expected = bbox.copy()
        bbox_expected[:, 0] = bbox[:, 0] * 4
        bbox_expected[:, 1] = bbox[:, 1] * 2
        bbox_expected[:, 2] = bbox[:, 2] * 4
        bbox_expected[:, 3] = bbox[:, 3] * 2
        np.testing.assert_equal(out, bbox_expected)
コード例 #13
0
ファイル: ssd.py プロジェクト: bkartel1/chainercv
    def predict(self, imgs):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :obj:`(y_min, x_min, y_max, x_max)` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """

        x = list()
        sizes = list()
        for img in imgs:
            _, H, W = img.shape
            img = self._prepare(img)
            x.append(self.xp.array(img))
            sizes.append((H, W))

        with chainer.function.no_backprop_mode():
            x = chainer.Variable(self.xp.stack(x))
            loc, conf = self(x)
        raw_bboxes, raw_scores = self._decode(loc.data, conf.data)

        bboxes = list()
        labels = list()
        scores = list()
        for raw_bbox, raw_score, size in zip(raw_bboxes, raw_scores, sizes):
            raw_bbox = transforms.resize_bbox(raw_bbox, (1, 1), size)
            bbox, label, score = self._suppress(raw_bbox, raw_score)
            bboxes.append(chainer.cuda.to_cpu(bbox))
            labels.append(chainer.cuda.to_cpu(label))
            scores.append(chainer.cuda.to_cpu(score))

        return bboxes, labels, scores
コード例 #14
0
ファイル: train.py プロジェクト: jacke121/chainer-maskrcnn
    def __call__(self, in_data):
        img, bbox, label, label_img = in_data
        _, H, W = img.shape
        img = self.faster_rcnn.prepare(img)
        _, o_H, o_W = img.shape
        scale = o_H / H
        bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))
        for i, im in enumerate(label_img):
            label_img[i] = cv2.resize(im, (o_W, o_H),
                                      interpolation=cv2.INTER_NEAREST)

        return img, bbox, label, label_img, scale
コード例 #15
0
    def test_resize_bbox(self):
        in_size = (32, 24)
        out_size = (in_size[0] * 2, in_size[1] * 4)
        bbox = generate_random_bbox(10, in_size, 0, min(in_size))

        out = resize_bbox(bbox, in_size=in_size, out_size=out_size)
        bbox_expected = bbox.copy()
        bbox_expected[:, 0] = bbox[:, 0] * 2
        bbox_expected[:, 1] = bbox[:, 1] * 4
        bbox_expected[:, 2] = bbox[:, 2] * 2
        bbox_expected[:, 3] = bbox[:, 3] * 4
        np.testing.assert_equal(out, bbox_expected)
コード例 #16
0
ファイル: data_set.py プロジェクト: cxrasdfg/my_faster_rcnn
    def __call__(self, in_data):
        img, bbox, label = in_data
        _, H, W = img.shape
        img = preprocess(img, self.min_size, self.max_size)
        _, o_H, o_W = img.shape
        scale = o_H / H
        bbox = resize_bbox(bbox, (H, W), (o_H, o_W))

        # horizontally flip
        img, params = random_flip(img, x_random=True, return_param=True)
        bbox = flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip'])

        return img, bbox, label, [scale, scale]
コード例 #17
0
    def __call__(self, in_data):
        img, bbox, label = in_data
        _, H, W = img.shape
        img = self.faster_rcnn.prepare(img)
        _, o_H, o_W = img.shape
        scale = o_H / H
        bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))

        # horizontally flip
        img, params = transforms.random_flip(img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params["x_flip"])

        return img, bbox, label, scale
コード例 #18
0
    def demo(self, imgs, detection=True, segmentation=True):
        if self.segmentation:
            segmentation = segmentation
        else:
            segmentation = self.segmentation

        if self.detection:
            detection = detection
        x = []
        sizes = []
        for img in imgs:
            _, H, W = img.shape
            img = self._prepare(img)
            x.append(self.xp.array(img))
            sizes.append((H, W))
        with chainer.using_config('train', False), \
             chainer.function.no_backprop_mode():
            x = chainer.Variable(self.xp.stack(x))
            result_detection, result_segmentation = self(x)

        bboxes = []
        labels = []
        scores = []
        masks = []
        if detection:
            mb_locs, mb_confs = result_detection
            # TODO: for detection
            mb_locs, mb_confs = mb_locs.array, mb_confs.array

            for mb_loc, mb_conf, size in zip(mb_locs, mb_confs, sizes):
                bbox, label, score = self.coder.decode(mb_loc, mb_conf,
                                                       self.nms_thresh,
                                                       self.score_thresh)
                bbox = transforms.resize_bbox(bbox, (self.insize, self.insize),
                                              size)
                bboxes.append(chainer.backends.cuda.to_cpu(bbox))
                labels.append(chainer.backends.cuda.to_cpu(label))
                scores.append(chainer.backends.cuda.to_cpu(score))

        if segmentation:
            # TODO: for segmentation
            mask = F.argmax(result_segmentation, axis=1)
            num, _, _ = mask.shape
            mask = mask.array

            for i, size in enumerate(sizes):
                mask_ = mask[i, :, :]
                mask_ = mask_resize_with_nearest(mask_, size)
                masks.append(chainer.backends.cuda.to_cpu(mask_))

        return bboxes, labels, scores, masks
コード例 #19
0
    def transform(in_data):
        img, bbox, label = in_data
        _, H, W = img.shape
        img = faster_rcnn.prepare(img)
        _, o_H, o_W = img.shape
        scale = o_H / H
        bbox = transforms.resize_bbox(bbox, (W, H), (o_W, o_H))

        # horizontally flip
        img, params = transforms.random_flip(
            img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(bbox, (o_W, o_H), params['x_flip'])

        return img, bbox, label, scale
コード例 #20
0
    def __call__(self, in_data):
        # There are five data augmentation steps
        # 1. Color augmentation
        # 2. Random expansion
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping

        img, bbox, label = in_data

        bbox = np.array(bbox).astype(np.float32)

        if len(bbox) == 0:
            warnings.warn("No bounding box detected", RuntimeWarning)
            img = resize_with_random_interpolation(img, (self.size, self.size))
            mb_loc, mb_label = self.coder.encode(bbox, label)
            return img, mb_loc, mb_label

        # 1. Color augmentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(
                img, fill=self.mean, return_param=True)
            bbox = transforms.translate_bbox(
                bbox, y_offset=param['y_offset'], x_offset=param['x_offset'])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(
            img, bbox, return_param=True)
        bbox, param = transforms.crop_bbox(
            bbox, y_slice=param['y_slice'], x_slice=param['x_slice'],
            allow_outside_center=False, return_param=True)
        label = label[param['index']]

        # 4. Resizing with random interpolatation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Random horizontal flipping
        img, params = transforms.random_flip(
            img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (self.size, self.size), x_flip=params['x_flip'])

        mb_loc, mb_label = self.coder.encode(bbox, label)
        return img, mb_loc, mb_label
コード例 #21
0
ファイル: train.py プロジェクト: gwtnb/chainercv
    def __call__(self, in_data):
        img, bbox, label = in_data
        _, H, W = img.shape
        img = self.faster_rcnn.prepare(img)
        _, o_H, o_W = img.shape
        scale = o_H / H
        bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))

        # horizontally flip
        img, params = transforms.random_flip(
            img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (o_H, o_W), x_flip=params['x_flip'])

        return img, bbox, label, scale
コード例 #22
0
    def __call__(self, in_data):
        img, bbox, keypoints = in_data
        _, H, W = img.shape
        img = self.faster_rcnn.prepare(img)
        _, o_H, o_W = img.shape
        scale = o_H / H

        bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))
        label = np.zeros(bbox.shape[0], dtype=np.int32)
        # shape of keypoints is (N, 17, 3), N is number of bbox, 17 is number of keypoints, 3 is (x, y, v)
        # v=0: unlabeled, v=1, labeled but invisible, v=2 labeled and visible
        keypoints = keypoints.astype(np.float32)
        kp = keypoints[:, :, [1, 0]]
        kp = np.concatenate([kp * scale, keypoints[:, :, 2, None]], axis=2)

        return img, bbox, label, kp, scale
コード例 #23
0
    def __call__(self, in_data):
        img, mask, label = in_data
        bbox = mask_to_bbox(mask)
        _, orig_H, orig_W = img.shape
        img = self.fcis.prepare(img)
        _, H, W = img.shape
        scale = H / orig_H
        mask = transforms.resize(mask.astype(np.float32), (H, W))
        bbox = transforms.resize_bbox(bbox, (orig_H, orig_W), (H, W))

        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        mask = transforms.flip(mask, x_flip=params['x_flip'])
        bbox = transforms.flip_bbox(bbox, (H, W), x_flip=params['x_flip'])
        return img, mask, label, bbox, scale
コード例 #24
0
    def get_example(self, i):
        """Returns the i-th example.

        Args:
            i (int): The index of the example.

        Returns:
            tuple of an image and its label.
            The image is in CHW format and its color channel is ordered in
            RGB.
            a bounding box is appended to the returned value.
        """
        #print("The image file name is %s"%self.images[i][0:-4])
        img = utils.read_image(os.path.join(self.data_dir, 'images',
                                            self.images[i]),
                               color=True)
        # Add processing to the other two channels
        with warnings.catch_warnings():
            # print("read in by expanding")
            warnings.simplefilter("ignore")
            img[1, :, :] = exposure.rescale_intensity(
                exposure.equalize_adapthist(
                    exposure.rescale_intensity(img[1, :, :])),
                out_range=(0, 255))
            img[2, :, :] = exposure.rescale_intensity(filters.gaussian(
                exposure.rescale_intensity(img[2, :, :])),
                                                      out_range=(0, 255))

        # bbs should be a matrix (m by 4). m is the number of bounding
        # boxes in the image
        # labels should be an integer array (m by 1). m is the same as the bbs

        bbs_file = os.path.join(self.data_dir, 'bounding_boxes',
                                self.images[i][0:-4] + '.txt')

        label_bbs = np.loadtxt(bbs_file, dtype=np.float32)
        label = label_bbs[:, 0].astype(np.int32)
        bbs = label_bbs[:, 1:5]

        _, H, W = img.shape
        if self.resize and (H != self.img_size or W != self.img_size):
            img = transforms.resize(img, (self.img_size, self.img_size))
            bbs = transforms.resize_bbox(bbs, (H, W),
                                         (self.img_size, self.img_size))

        return img, bbs, label
コード例 #25
0
    def transform(in_data):
        img, bbox = in_data
        img -= np.array([103.939, 116.779, 123.68])[:, None, None]

        # Resize bounding box to a shape
        # with the smaller edge at least at length 600
        input_shape = img.shape[1:]
        output_shape = _shape_soft_min_hard_max(input_shape, 600, 1200)
        img = transforms.resize(img, output_shape)
        bbox = transforms.resize_bbox(bbox, input_shape, output_shape)

        # horizontally flip
        img, flips = transforms.random_flip(img,
                                            horizontal_flip=True,
                                            return_flip=True)
        h_flip = flips['h']
        bbox = transforms.flip_bbox(bbox, output_shape, h_flip)
        return img, bbox
コード例 #26
0
ファイル: train.py プロジェクト: gwtnb/chainercv
    def __call__(self, in_data):
        # There are five data augmentation steps
        # 1. Color augmentation
        # 2. Random expansion
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping

        img, bbox, label = in_data

        # 1. Color augmentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(
                img, fill=self.mean, return_param=True)
            bbox = transforms.translate_bbox(
                bbox, y_offset=param['y_offset'], x_offset=param['x_offset'])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(
            img, bbox, return_param=True)
        bbox, param = transforms.crop_bbox(
            bbox, y_slice=param['y_slice'], x_slice=param['x_slice'],
            allow_outside_center=False, return_param=True)
        label = label[param['index']]

        # 4. Resizing with random interpolatation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Random horizontal flipping
        img, params = transforms.random_flip(
            img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (self.size, self.size), x_flip=params['x_flip'])

        # Preparation for SSD network
        img -= self.mean
        mb_loc, mb_label = self.coder.encode(bbox, label)

        return img, mb_loc, mb_label
コード例 #27
0
    def __call__(self, in_data):
        img, bbox, label = in_data
        _, H, W = img.shape

        # random brightness and contrast
        img = random_distort(img)

        img = self.faster_rcnn.prepare(img)
        _, o_H, o_W = img.shape
        bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))

        # horizontally & vertical flip
        img, params = transforms.random_flip(
            img, x_random=True, y_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (o_H, o_W), x_flip=params['x_flip'], y_flip=params['y_flip'])

        scale = o_H / t_H

        return img, bbox, label, scale
コード例 #28
0
    def __call__(self, in_data):
        if len(in_data) == 6:
            img, bbox, label, mask, crowd, area = in_data
        elif len(in_data) == 4:
            img, bbox, label, mask = in_data
        else:
            raise ValueError

        img = img.transpose(2, 0, 1)  # H, W, C -> C, H, W

        if not self.train:
            if len(in_data) == 6:
                return img, bbox, label, mask, crowd, area
            elif len(in_data) == 4:
                return img, bbox, label, mask
            else:
                raise ValueError

        _, H, W = img.shape
        img = self.mask_rcnn.prepare(img)
        _, o_H, o_W = img.shape
        scale = o_H / H
        if len(bbox) > 0:
            bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))
        if len(mask) > 0:
            mask = transforms.resize(
                mask, size=(o_H, o_W), interpolation=0)

        # horizontally flip
        img, params = transforms.random_flip(
            img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (o_H, o_W), x_flip=params['x_flip'])
        if mask.ndim == 2:
            mask = transforms.flip(
                mask[None, :, :], x_flip=params['x_flip'])[0]
        else:
            mask = transforms.flip(mask, x_flip=params['x_flip'])

        return img, bbox, label, mask, scale
コード例 #29
0
    def get_example(self, i):
        try:
            image, label = super().get_example(i)
        except Exception as e:
            print(e)
            image, label = super().get_example(0)

        if len(label.shape) > 0 and len(label) % 4 == 0:
            num_bboxes = len(label) // 4
            label = numpy.reshape(label, (num_bboxes, -1))

        if image.shape[0] == 1:
            image = numpy.tile(image, (3, 1, 1))

        if self.augmentations is not None:
            image = numpy.transpose(image, (1, 2, 0))
            image = image.astype(numpy.uint8)
            image = self.augmentations.augment_images([image])[0]
            image = image.astype(numpy.float32)
            image = numpy.transpose(image, (2, 0, 1))

        if self.image_size is not None:
            image_size = image.shape[-2:]
            if len(label.shape) > 1:
                # we are likely dealing with bboxes
                self.check_for_bad_label(label, image_size)
                label = transforms.resize_bbox(label.astype(numpy.float32),
                                               image_size, self.image_size)
            image = resize_image(image,
                                 self.image_size,
                                 image_mode=self.image_mode)
            label = label.astype(self._label_dtype)

        if len(image.shape) == 2:
            image = image[None, ...]

        if self.return_dummy_scores:
            return image / 255, label, numpy.zeros((1, ))
        return image / 255, label
コード例 #30
0
    def __call__(self, in_data):
        img, bbox, label = in_data

        img = random_distort(img)

        if np.random.randint(2):
            img, param = transforms.random_expand(img,
                                                  fill=self.mean,
                                                  return_param=True)
            bbox = transforms.translate_bbox(bbox,
                                             y_offset=param['y_offset'],
                                             x_offset=param['x_offset'])

        img, param = random_crop_with_bbox_constraints(img,
                                                       bbox,
                                                       return_param=True)
        bbox, param = transforms.crop_bbox(bbox,
                                           y_slice=param['y_slice'],
                                           x_slice=param['x_slice'],
                                           allow_outside_center=False,
                                           return_param=True)
        label = label[param['index']]

        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox, (self.size, self.size),
                                    x_flip=params['x_flip'])

        img -= self.mean
        mb_loc, mb_label = self.coder.encode(bbox, label)

        return img, mb_loc, mb_label
コード例 #31
0
    def predict(self, imgs):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """

        x = []
        params = []
        for img in imgs:
            _, H, W = img.shape
            img, param = transforms.resize_contain(img / 255,
                                                   (self.insize, self.insize),
                                                   fill=0.5,
                                                   return_param=True)
            x.append(self.xp.array(img))
            param['size'] = (H, W)
            params.append(param)

        with chainer.using_config('train', False), \
                chainer.function.no_backprop_mode():
            locs, objs, confs = self(self.xp.stack(x))
        locs = locs.array
        objs = objs.array
        confs = confs.array

        bboxes = []
        labels = []
        scores = []
        for loc, obj, conf, param in zip(locs, objs, confs, params):
            bbox, label, score = self._decode(loc, obj, conf)
            bbox = cuda.to_cpu(bbox)
            label = cuda.to_cpu(label)
            score = cuda.to_cpu(score)

            bbox = transforms.translate_bbox(bbox, -self.insize / 2,
                                             -self.insize / 2)
            bbox = transforms.resize_bbox(bbox, param['scaled_size'],
                                          param['size'])
            bbox = transforms.translate_bbox(bbox, param['size'][0] / 2,
                                             param['size'][1] / 2)

            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        return bboxes, labels, scores
コード例 #32
0
    def __call__(self, in_data):
        img, bbox, label = in_data
        _, H, W = img.shape

        # random brightness and contrast
        img = random_distort(img)

        # rotate image
        # return a tuple whose elements are rotated image, param.
        # k (int in param)represents the number of times the image is rotated by 90 degrees.
        img, params = transforms.random_rotate(img, return_param=True)
        # restore the new hight and width
        _, t_H, t_W = img.shape
        # rotate bbox based on renewed parameters
        bbox = rotate_bbox(bbox, (H, W), params['k'])

        # Random expansion:This method randomly place the input image on
        # a larger canvas. The size of the canvas is (rH,rW), r is a random ratio drawn from [1,max_ratio].
        # The canvas is filled by a value fill except for the region where the original image is placed.
        if np.random.randint(2):
            fill_value = img.mean(axis=1).mean(axis=1).reshape(-1, 1, 1)
            img, param = transforms.random_expand(img,
                                                  max_ratio=2,
                                                  fill=fill_value,
                                                  return_param=True)
            bbox = transforms.translate_bbox(bbox,
                                             y_offset=param['y_offset'],
                                             x_offset=param['x_offset'])

        # Random crop
        # crops the image with bounding box constraints
        img, param = random_crop_with_bbox_constraints(img,
                                                       bbox,
                                                       min_scale=0.5,
                                                       max_aspect_ratio=1.5,
                                                       return_param=True)
        # this translates bounding boxes to fit within the cropped area of an image, bounding boxes whose centers are outside of the cropped area are removed.
        bbox, param = transforms.crop_bbox(bbox,
                                           y_slice=param['y_slice'],
                                           x_slice=param['x_slice'],
                                           allow_outside_center=False,
                                           return_param=True)
        #assigning new labels to the bounding boxes after cropping
        label = label[param['index']]
        # if the bounding boxes are all removed,
        if bbox.shape[0] == 0:
            img, bbox, label = in_data
        # update the height and width of the image
        _, t_H, t_W = img.shape

        img = self.faster_rcnn.prepare(img)
        # prepares the image to match the size of the image to be input into the RCNN
        _, o_H, o_W = img.shape
        # resize the bounding box according to the image resize
        bbox = transforms.resize_bbox(bbox, (t_H, t_W), (o_H, o_W))

        # horizontally & vertical flip
        # simutaneously flip horizontally and vertically of the image
        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             y_random=True,
                                             return_param=True)
        # flip the bounding box with respect to the parameter
        bbox = transforms.flip_bbox(bbox, (o_H, o_W),
                                    x_flip=params['x_flip'],
                                    y_flip=params['y_flip'])

        scale = o_H / t_H

        return img, bbox, label, scale
コード例 #33
0
    def __call__(self, in_data):
        """in_data includes three datas.
        Args:
            img(array): Shape is (3, H, W). range is [0, 255].
            bbox(array): Shape is (N, 4). (y_min, x_min, y_max, x_max).
                         range is [0, max size of boxes].
            label(array): Classes of bounding boxes.

        Returns:
            img(array): Shape is (3, out_H, out_W). range is [0, 1].
                        interpolation value equals to self.value.
        """
        # There are five data augmentation steps
        # 1. Color augmentation
        # 2. Random expansion
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping
        if self.count % 10 == 0 and self.count % self.batchsize == 0 and self.count != 0:
            self.i += 1
            i = self.i % len(self.dim)
            self.output_shape = (self.dim[i], self.dim[i])
        self.count += 1

        img, bbox, label = in_data

        # 1. Color augmentation
        img = random_distort(img,
                             brightness_delta=32,
                             contrast_low=0.5,
                             contrast_high=1.5,
                             saturation_low=0.5,
                             saturation_high=1.5,
                             hue_delta=25)

        # Normalize. range is [0, 1]
        img /= 255.0

        _, H, W = img.shape
        scale = np.random.uniform(0.25, 2)
        random_expand = np.random.uniform(0.8, 1.2, 2)
        net_h, net_w = self.output_shape
        out_h = net_h * scale  # random_expand[0]
        out_w = net_w * scale  # random_expand[1]
        if H > W:
            out_w = out_h * (float(W) / H) * np.random.uniform(0.8, 1.2)
        elif H < W:
            out_h = out_w * (float(H) / W) * np.random.uniform(0.8, 1.2)

        out_h = int(out_h)
        out_w = int(out_w)

        img = resize_with_random_interpolation(img, (out_h, out_w))
        bbox = transforms.resize_bbox(bbox, (H, W), (out_h, out_w))

        if out_h < net_h and out_w < net_w:
            img, param = expand(img,
                                out_h=net_h,
                                out_w=net_w,
                                fill=self.value,
                                return_param=True)
            bbox = transforms.translate_bbox(bbox,
                                             y_offset=param['y_offset'],
                                             x_offset=param['x_offset'])
        else:
            out_h = net_h if net_h > out_h else int(out_h * 1.05)
            out_w = net_w if net_w > out_w else int(out_w * 1.05)
            img, param = expand(img,
                                out_h=out_h,
                                out_w=out_w,
                                fill=self.value,
                                return_param=True)
            bbox = transforms.translate_bbox(bbox,
                                             y_offset=param['y_offset'],
                                             x_offset=param['x_offset'])

            img, param = crop_with_bbox_constraints(img,
                                                    bbox,
                                                    return_param=True,
                                                    crop_height=net_h,
                                                    crop_width=net_w)
            bbox, param = transforms.crop_bbox(bbox,
                                               y_slice=param['y_slice'],
                                               x_slice=param['x_slice'],
                                               allow_outside_center=False,
                                               return_param=True)
            label = label[param['index']]

        # 5. Random horizontal flipping # OK
        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox,
                                    self.output_shape,
                                    x_flip=params['x_flip'])

        # Preparation for Yolov2 network. scale=[0, 1]
        bbox[:, ::2] /= self.output_shape[0]  # y
        bbox[:, 1::2] /= self.output_shape[1]  # x

        num_bbox = len(bbox)
        len_max = max(num_bbox, self.max_target)
        out_bbox = np.zeros((len_max, 4), dtype='f')
        out_bbox[:num_bbox] = bbox[:num_bbox]
        out_label = np.zeros((len_max), dtype='i')
        out_label[:num_bbox] = label
        out_bbox = out_bbox[:self.max_target]
        out_label = out_label[:self.max_target]
        num_array = min(num_bbox, self.max_target)

        gmap = create_map_anchor_gt(bbox, self.anchors, self.output_shape,
                                    self.downscale, self.n_boxes, len_max)
        gmap = gmap[:self.max_target]

        img = np.clip(img, 0, 1)
        return img, out_bbox, out_label, gmap, np.array([num_array], dtype='i')