コード例 #1
0
def preprocess(image, joint, camera, param):
    logger.debug("rotate")
    # random rotate
    image, joint = rotate_hand(image, joint, camera, angle=param["angle"])
    if param["add_noise"]:
        noise = np.random.normal(0, 1, joint.shape)
        joint = joint + noise
    # crop
    logger.debug("crop")
    image, camera, domain = crop_handwith2d(
        image,
        joint,
        camera,
        param["oscillation"],
    )

    logger.debug("scale")
    image, camera = scale(image, camera, param["crop2d"])

    logger.debug("flip")
    # random flip
    image, joint = flip_hand(
        image, joint, camera,
        y_flip=param["y_flip"],
        x_flip=param["x_flip"],
    )
    logger.debug("flip")
    # scale image so that target image contains a specific domain
    image, camera = resize_contain(image, camera, size=param["crop2d"], fill=0)
    if param["do_color_augmentation"]:
        logger.debug("distort image")
        image = random_distort(image)
    return image, joint, camera, domain
コード例 #2
0
    def __getitem__(self, _idx):
        _camera, _img, _skel_2d, _skel3d, _side = super(DataPose,
                                                        self).__getitem__(_idx)

        _bbox = self._getbbox(_skel_2d)
        _target = self._gettarget()

        _trans, _crop = self._gen_trans(_bbox, _target, _img)

        if random.random() <= 0.5:  # flip
            _skel_2d[:, 0] = _img.shape[1] - _skel_2d[:, 0]
            _skel3d[:, 0] = -_skel3d[:, 0]
            _side = 1 - _side

            _bbox = self._getbbox(_skel_2d)
            _trans, _ = self._gen_trans(_bbox, _target, None)
            _crop = _crop[:, ::-1, :]

        _uv2d = self._get_point2d(_skel_2d, _trans)
        _crop_img = _crop
        _crop_img = _crop_img.transpose((2, 0, 1))
        from chainercv.links.model.ssd import random_distort
        _crop_img = random_distort(_crop_img)

        if False:
            _crop_img_cop = _crop_img.transpose((1, 2, 0))
            self._vis(_crop_img_cop, _uv2d, _skel3d)

        return np.array(_crop_img, dtype=np.float32), \
            np.array(_uv2d / self.config.input_size, dtype=np.float32), \
            np.uint8(_side)
コード例 #3
0
    def __call__(self, in_data):
        img, bbox, label = in_data

        # 1. Color augumentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(
                img, fill=self.mean, return_param=True)
            bbox = transforms.translate_bbox(
                bbox, y_offset=param["y_offset"], x_offset=param["x_offset"])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(
            img, bbox, return_param=True)
        bbox, param = transforms.crop_bbox(
            bbox, y_slice=param["y_slice"], x_slice=param["x_slice"],
            allow_outside_center=False, return_param=True)
        label = label[param["index"]]

        # 4. Resizing with random interpolation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Transformation for SSD network input
        img -= self.mean
        mb_loc, mb_lab = self.coder.encode(bbox, label)

        return img, mb_loc, mb_lab
コード例 #4
0
    def __call__(self, in_data):
        img, bbox, label = in_data

        # 1. Color augumentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(
                img, fill=self.mean, return_param=True)
            bbox = transforms.translate_bbox(
                bbox, y_offset=param["y_offset"], x_offset=param["x_offset"])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(
            img, bbox, return_param=True)
        bbox, param = transforms.crop_bbox(
            bbox, y_slice=param["y_slice"], x_slice=param["x_slice"],
            allow_outside_center=False, return_param=True)
        label = label[param["index"]]

        # 4. Resizing with random interpolation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Transformation for SSD network input
        img -= self.mean
        mb_loc, mb_lab = self.coder.encode(bbox, label)

        return img, mb_loc, mb_lab
コード例 #5
0
    def test_random_distort(self):
        if not optional_modules:
            return
        img = np.random.randint(0, 256, size=(3, 48, 32)).astype(np.float32)

        out = random_distort(img)
        self.assertEqual(out.shape, img.shape)
        self.assertEqual(out.dtype, img.dtype)
コード例 #6
0
    def __call__(self, in_data):
        # There are five data augmentation steps
        # 1. Color augmentation
        # 2. Random expansion
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping
        # 6. Random vertical flipping

        img, bbox, label = in_data

        # 1. Color augmentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(img,
                                                  fill=self.mean,
                                                  return_param=True)
            bbox = transforms.translate_bbox(bbox,
                                             y_offset=param['y_offset'],
                                             x_offset=param['x_offset'])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(img,
                                                       bbox,
                                                       return_param=True)
        bbox, param = transforms.crop_bbox(bbox,
                                           y_slice=param['y_slice'],
                                           x_slice=param['x_slice'],
                                           allow_outside_center=False,
                                           return_param=True)
        label = label[param['index']]

        # 4. Resizing with random interpolatation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Random horizontal flipping
        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox, (self.size, self.size),
                                    x_flip=params['x_flip'])

        # 6. Random vertical flipping
        img, params = transforms.random_flip(img,
                                             y_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox, (self.size, self.size),
                                    y_flip=params['y_flip'])

        # Preparation for SSD network
        img -= self.mean
        mb_loc, mb_label = self.coder.encode(bbox, label)

        return img, mb_loc, mb_label
コード例 #7
0
    def __call__(self, in_data):
        # 5段階のステップでデータの水増しを行う
        # 1. 色の拡張
        # 2. ランダムな拡大
        # 3. ランダムなトリミング
        # 4. ランダムな補完の再補正
        # 5. ランダムな水平反転

        img, bbox, label = in_data

        # 1. 色の拡張
        # 明るさ,コントラスト,彩度,色相を組み合わせ,データ拡張をする
        img = random_distort(img)

        # 2. ランダムな拡大
        if np.random.randint(2):
            # キャンバスの様々な座標に入力画像を置いて,様々な比率の画像を生成し,bounding boxを更新
            img, param = transforms.random_expand(img,
                                                  fill=self.mean,
                                                  return_param=True)
            bbox = transforms.translate_bbox(bbox,
                                             y_offset=param['y_offset'],
                                             x_offset=param['x_offset'])

        # 3. ランダムなトリミング
        img, param = random_crop_with_bbox_constraints(img,
                                                       bbox,
                                                       return_param=True)
        # トリミングされた画像内にbounding boxが入るように調整
        bbox, param = transforms.crop_bbox(bbox,
                                           y_slice=param['y_slice'],
                                           x_slice=param['x_slice'],
                                           allow_outside_center=False,
                                           return_param=True)
        label = label[param['index']]

        # 4. ランダムな補完の再補正
        ## 画像とbounding boxのリサイズ
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. ランダムな水平反転
        ## 画像とbounding boxをランダムに水平方向に反転
        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox, (self.size, self.size),
                                    x_flip=params['x_flip'])

        # SSDのネットワークに入力するための準備の処理
        img -= self.mean
        ## SSDに入力するためのloc(デフォルトbounding boxのオフセットとスケール)と
        ## mb_label(クラスを表す配列)を出力
        mb_loc, mb_label = self.coder.encode(bbox, label)

        return img, mb_loc, mb_label
コード例 #8
0
    def __call__(self, in_data):
        # There are five data augmentation steps
        # 1. Color augmentation
        # 2. Random expansion
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping

        img, bbox, label = in_data

        bbox = np.array(bbox).astype(np.float32)

        if len(bbox) == 0:
            warnings.warn("No bounding box detected", RuntimeWarning)
            img = resize_with_random_interpolation(img, (self.size, self.size))
            mb_loc, mb_label = self.coder.encode(bbox, label)
            return img, mb_loc, mb_label

        # 1. Color augmentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(
                img, fill=self.mean, return_param=True)
            bbox = transforms.translate_bbox(
                bbox, y_offset=param['y_offset'], x_offset=param['x_offset'])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(
            img, bbox, return_param=True)
        bbox, param = transforms.crop_bbox(
            bbox, y_slice=param['y_slice'], x_slice=param['x_slice'],
            allow_outside_center=False, return_param=True)
        label = label[param['index']]

        # 4. Resizing with random interpolatation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Random horizontal flipping
        img, params = transforms.random_flip(
            img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (self.size, self.size), x_flip=params['x_flip'])

        mb_loc, mb_label = self.coder.encode(bbox, label)
        return img, mb_loc, mb_label
コード例 #9
0
ファイル: train.py プロジェクト: gwtnb/chainercv
    def __call__(self, in_data):
        # There are five data augmentation steps
        # 1. Color augmentation
        # 2. Random expansion
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping

        img, bbox, label = in_data

        # 1. Color augmentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(
                img, fill=self.mean, return_param=True)
            bbox = transforms.translate_bbox(
                bbox, y_offset=param['y_offset'], x_offset=param['x_offset'])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(
            img, bbox, return_param=True)
        bbox, param = transforms.crop_bbox(
            bbox, y_slice=param['y_slice'], x_slice=param['x_slice'],
            allow_outside_center=False, return_param=True)
        label = label[param['index']]

        # 4. Resizing with random interpolatation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Random horizontal flipping
        img, params = transforms.random_flip(
            img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (self.size, self.size), x_flip=params['x_flip'])

        # Preparation for SSD network
        img -= self.mean
        mb_loc, mb_label = self.coder.encode(bbox, label)

        return img, mb_loc, mb_label
コード例 #10
0
    def __call__(self, in_data):
        img, bbox, label = in_data

        img = random_distort(img)

        if np.random.randint(2):
            img, param = transforms.random_expand(img,
                                                  fill=self.mean,
                                                  return_param=True)
            bbox = transforms.translate_bbox(bbox,
                                             y_offset=param['y_offset'],
                                             x_offset=param['x_offset'])

        img, param = random_crop_with_bbox_constraints(img,
                                                       bbox,
                                                       return_param=True)
        bbox, param = transforms.crop_bbox(bbox,
                                           y_slice=param['y_slice'],
                                           x_slice=param['x_slice'],
                                           allow_outside_center=False,
                                           return_param=True)
        label = label[param['index']]

        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox, (self.size, self.size),
                                    x_flip=params['x_flip'])

        img -= self.mean
        mb_loc, mb_label = self.coder.encode(bbox, label)

        return img, mb_loc, mb_label
コード例 #11
0
    def __call__(self, in_data):
        """in_data includes three datas.
        Args:
            img(array): Shape is (3, H, W). range is [0, 255].
            bbox(array): Shape is (N, 4). (y_min, x_min, y_max, x_max).
                         range is [0, max size of boxes].
            label(array): Classes of bounding boxes.

        Returns:
            img(array): Shape is (3, out_H, out_W). range is [0, 1].
                        interpolation value equals to self.value.
        """
        # There are five data augmentation steps
        # 1. Color augmentation
        # 2. Random expansion
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping
        if self.count % 10 == 0 and self.count % self.batchsize == 0 and self.count != 0:
            self.i += 1
            i = self.i % len(self.dim)
            self.output_shape = (self.dim[i], self.dim[i])
        self.count += 1

        img, bbox, label = in_data

        # 1. Color augmentation
        img = random_distort(img,
                             brightness_delta=32,
                             contrast_low=0.5,
                             contrast_high=1.5,
                             saturation_low=0.5,
                             saturation_high=1.5,
                             hue_delta=25)

        # Normalize. range is [0, 1]
        img /= 255.0

        _, H, W = img.shape
        scale = np.random.uniform(0.25, 2)
        random_expand = np.random.uniform(0.8, 1.2, 2)
        net_h, net_w = self.output_shape
        out_h = net_h * scale  # random_expand[0]
        out_w = net_w * scale  # random_expand[1]
        if H > W:
            out_w = out_h * (float(W) / H) * np.random.uniform(0.8, 1.2)
        elif H < W:
            out_h = out_w * (float(H) / W) * np.random.uniform(0.8, 1.2)

        out_h = int(out_h)
        out_w = int(out_w)

        img = resize_with_random_interpolation(img, (out_h, out_w))
        bbox = transforms.resize_bbox(bbox, (H, W), (out_h, out_w))

        if out_h < net_h and out_w < net_w:
            img, param = expand(img,
                                out_h=net_h,
                                out_w=net_w,
                                fill=self.value,
                                return_param=True)
            bbox = transforms.translate_bbox(bbox,
                                             y_offset=param['y_offset'],
                                             x_offset=param['x_offset'])
        else:
            out_h = net_h if net_h > out_h else int(out_h * 1.05)
            out_w = net_w if net_w > out_w else int(out_w * 1.05)
            img, param = expand(img,
                                out_h=out_h,
                                out_w=out_w,
                                fill=self.value,
                                return_param=True)
            bbox = transforms.translate_bbox(bbox,
                                             y_offset=param['y_offset'],
                                             x_offset=param['x_offset'])

            img, param = crop_with_bbox_constraints(img,
                                                    bbox,
                                                    return_param=True,
                                                    crop_height=net_h,
                                                    crop_width=net_w)
            bbox, param = transforms.crop_bbox(bbox,
                                               y_slice=param['y_slice'],
                                               x_slice=param['x_slice'],
                                               allow_outside_center=False,
                                               return_param=True)
            label = label[param['index']]

        # 5. Random horizontal flipping # OK
        img, params = transforms.random_flip(img,
                                             x_random=True,
                                             return_param=True)
        bbox = transforms.flip_bbox(bbox,
                                    self.output_shape,
                                    x_flip=params['x_flip'])

        # Preparation for Yolov2 network. scale=[0, 1]
        bbox[:, ::2] /= self.output_shape[0]  # y
        bbox[:, 1::2] /= self.output_shape[1]  # x

        num_bbox = len(bbox)
        len_max = max(num_bbox, self.max_target)
        out_bbox = np.zeros((len_max, 4), dtype='f')
        out_bbox[:num_bbox] = bbox[:num_bbox]
        out_label = np.zeros((len_max), dtype='i')
        out_label[:num_bbox] = label
        out_bbox = out_bbox[:self.max_target]
        out_label = out_label[:self.max_target]
        num_array = min(num_bbox, self.max_target)

        gmap = create_map_anchor_gt(bbox, self.anchors, self.output_shape,
                                    self.downscale, self.n_boxes, len_max)
        gmap = gmap[:self.max_target]

        img = np.clip(img, 0, 1)
        return img, out_bbox, out_label, gmap, np.array([num_array], dtype='i')
コード例 #12
0
 def _data_augumentation(self, image):
     image = random_distort(image)
     image = random_flip(image, x_random=True)
     return image
コード例 #13
0
ファイル: datasets.py プロジェクト: jphacks/TK_1810
    def __getitem__(self, index):
        img_path = self.img_files[index % len(self.img_files)].rstrip()
        img = np.array(Image.open(img_path))
        h, w, _ = img.shape

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        if not os.path.exists(label_path):
            raise Exception(
                "the label file(.txt) is not found corresponding + " +
                img_path)

        labels = np.loadtxt(label_path).reshape(-1, 5)

        #--------------------
        #  data augmentation
        #--------------------
        lx = w * (labels[:, 1] - labels[:, 3] / 2)
        ly = h * (labels[:, 2] - labels[:, 4] / 2)
        bx = w * (labels[:, 1] + labels[:, 3] / 2)
        by = h * (labels[:, 2] + labels[:, 4] / 2)

        # convert to chainercv format: (ly, lx, by, bx)
        cv_bbox = np.stack([ly, lx, by, bx], axis=1)
        cv_labels = labels[:, 0].reshape(-1).astype(np.int)
        cv_img = img.transpose(2, 0, 1)

        # 1. Random distort
        cv_img = random_distort(cv_img)

        # 2. Random cropping
        cv_img, param = random_crop_with_bbox_constraints(cv_img,
                                                          cv_bbox,
                                                          min_scale=0.3,
                                                          return_param=True)
        cv_bbox, param = transforms.crop_bbox(cv_bbox,
                                              y_slice=param['y_slice'],
                                              x_slice=param['x_slice'],
                                              allow_outside_center=False,
                                              return_param=True)
        cv_labels = cv_labels[param['index']]

        # 3. Random horizontal flipping
        _, _h, _w = cv_img.shape
        cv_img, params = transforms.random_flip(cv_img,
                                                x_random=True,
                                                return_param=True)
        cv_bbox = transforms.flip_bbox(cv_bbox, (_h, _w),
                                       x_flip=params['x_flip'])

        # update params
        img = cv_img.transpose(1, 2, 0)
        h, w, _ = img.shape

        # convert to default format: (cx, cy, w, h)
        labels = np.zeros((cv_labels.size, 5))
        labels[:, 0] = cv_labels
        labels[:, 1] = (cv_bbox[:, 3] + cv_bbox[:, 1]) / 2.0 / w  # cx
        labels[:, 2] = (cv_bbox[:, 2] + cv_bbox[:, 0]) / 2.0 / h  # cy
        labels[:, 3] = (cv_bbox[:, 3] - cv_bbox[:, 1]) / w  # w
        labels[:, 4] = (cv_bbox[:, 2] - cv_bbox[:, 0]) / h  # x

        #---------
        #  image
        #---------
        dim_diff = np.abs(h - w)
        # Upper (left) and lower (right) padding
        pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
        # Determine padding
        pad = ((pad1, pad2), (0, 0),
               (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
        # Add padding
        input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.
        padded_h, padded_w, _ = input_img.shape
        # Resize and normalize
        input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
        # Channels-first
        input_img = np.transpose(input_img, (2, 0, 1))
        # As pytorch tensor
        input_img = torch.from_numpy(input_img).float()

        #---------
        #  Label
        #---------
        # Extract coordinates for unpadded + unscaled image
        x1 = w * (labels[:, 1] - labels[:, 3] / 2)
        y1 = h * (labels[:, 2] - labels[:, 4] / 2)
        x2 = w * (labels[:, 1] + labels[:, 3] / 2)
        y2 = h * (labels[:, 2] + labels[:, 4] / 2)
        # Adjust for added padding
        x1 += pad[1][0]
        y1 += pad[0][0]
        x2 += pad[1][0]
        y2 += pad[0][0]
        # Calculate ratios from coordinates
        labels[:, 1] = ((x1 + x2) / 2) / padded_w
        labels[:, 2] = ((y1 + y2) / 2) / padded_h
        labels[:, 3] *= w / padded_w
        labels[:, 4] *= h / padded_h

        # Fill matrix
        filled_labels = np.zeros((self.max_objects, 5))
        if labels is not None:
            filled_labels[range(
                len(labels))[:self.max_objects]] = labels[:self.max_objects]
        filled_labels = torch.from_numpy(filled_labels)

        return img_path, input_img, filled_labels
コード例 #14
0
    def __call__(self, in_data):
        # There are five data augmentation steps
        # 1. Color augmentation
        # 2. Random expansion
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping

        # mask = None
        img, bbox, label, mask = in_data

        # TODO: show information

        # self._show_img(img)
        # self._show_mask(mask)
        # 1. Color augmentation
        img = random_distort(img)

        # self._show_img(img)
        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(
                img, fill=self.mean, return_param=True)
            bbox = transforms.translate_bbox(
                bbox, y_offset=param['y_offset'], x_offset=param['x_offset'])
            if mask is not None:
                _, new_height, new_width = img.shape
                param['new_height'] = new_height
                param['new_width'] = new_width
                mask = self._random_expand_mask(mask, param)

        # self._show_img(img)
        # self._show_mask(mask)

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(
            img, bbox, return_param=True)

        # self._show_img(img)

        mask = self._fixed_crop_mask(mask, param['y_slice'], param['x_slice'])

        # self._show_mask(mask)

        bbox, param = transforms.crop_bbox(
            bbox, y_slice=param['y_slice'], x_slice=param['x_slice'],
            allow_outside_center=False, return_param=True)
        label = label[param['index']]

        # 4. Resizing with random interpolatation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))

        # self._show_img(img)

        if mask is not None:
            if mask.size == 0:
                raise RuntimeError
            mask = self._resize_with_nearest(mask, (self.size, self.size))

        # self._show_mask(mask)

        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Random horizontal flipping
        img, params = transforms.random_flip(
            img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (self.size, self.size), x_flip=params['x_flip'])
        if mask is not None:
            mask = self._random_flip_mask(mask, x_flip=params['x_flip'], y_flip=params['y_flip'])

        # self._show_img(img)
        # self._show_mask(mask)

        # Preparation for SSD network
        img -= self.mean
        mb_loc, mb_label = self.coder.encode(bbox, label)
        if mask is None:
            mask = np.ones([self.size, self.size], dtype=np.int32) * -1
        # print("Dtype is :"+str(mask.dtype))
        data_type = str(mask.dtype)
        target_type = 'int32'
        if data_type != target_type:
            mask = mask.astype(np.int32)
        if img is None:
            raise RuntimeError
        return img, mb_loc, mb_label, mask