def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape # random brightness and contrast img = random_distort(img) # rotate image # return a tuple whose elements are rotated image, param. # k (int in param)represents the number of times the image is rotated by 90 degrees. img, params = transforms.random_rotate(img, return_param=True) # restore the new hight and width _, t_H, t_W = img.shape # rotate bbox based on renewed parameters bbox = rotate_bbox(bbox, (H, W), params['k']) img = self.faster_rcnn.prepare(img) # prepares the image to match the size of the image to be input into the RCNN _, o_H, o_W = img.shape # resize the bounding box according to the image resize bbox = transforms.resize_bbox(bbox, (t_H, t_W), (o_H, o_W)) # horizontally & vertical flip # simutaneously flip horizontally and vertically of the image img, params = transforms.random_flip(img, x_random=True, y_random=True, return_param=True) # flip the bounding box with respect to the parameter bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip'], y_flip=params['y_flip']) scale = o_H / t_H return img, bbox, label, scale
def __call__(self, in_data): # There are five data augmentation steps # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 3. Random cropping if self.random_crop and np.random.rand() > 0.5: next_img, param = random_crop_with_bbox_constraints( img, bbox, min_scale=min(self.crop_rate), max_scale=max(self.crop_rate), return_param=True) next_bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) if (len(label[param['index']]) != 0): label = label[param['index']] img, bbox = next_img, next_bbox # 4. Resizing with random interpolatation _, H, W = img.shape img = transforms.resize(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping if self.flip: img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) img -= self.mean img /= self.std _, height, width = img.shape ymin = bbox[:, 0] xmin = bbox[:, 1] ymax = bbox[:, 2] xmax = bbox[:, 3] one_hot_label = np.eye(self.n_class)[label] xs = (xmin + (xmax - xmin) // 2) / width ws = (xmax - xmin) / width ys = (ymin + (ymax - ymin) // 2) / height hs = (ymax - ymin) / height t = [{ 'label': l, 'x': x, 'w': w, 'y': y, 'h': h, 'one_hot_label': hot } for l, x, w, y, h, hot in zip(label, xs, ws, ys, hs, one_hot_label)] return img, t
def __call__(self, in_data): img, bbox, label = in_data # 1. Color augumentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param["y_offset"], x_offset=param["x_offset"]) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param["y_slice"], x_slice=param["x_slice"], allow_outside_center=False, return_param=True) label = label[param["index"]] # 4. Resizing with random interpolation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Transformation for SSD network input img -= self.mean mb_loc, mb_lab = self.coder.encode(bbox, label) return img, mb_loc, mb_lab
def predict(self, imgs, k=100, detail=False, output_index=-1): x = [] sizes = [] for img in imgs: _, H, W = img.shape img = self._prepare(img) x.append(self.xp.array(img)) sizes.append((H, W)) with chainer.using_config('train', False), chainer.function.no_backprop_mode(): x = Variable(self.xp.stack(x)) output = self.forward(x)[output_index] bboxes = [] labels = [] scores = [] output['hm'] = F.sigmoid(output['hm']) output['hm'].to_cpu() for i in range(len(imgs)): bbox, label, score = self._decode_output(output, i, k) bbox = transforms.resize_bbox(bbox, (self.insize, self.insize), sizes[i]) bboxes.append(bbox) labels.append(label) scores.append(score) if detail: return bboxes, labels, scores, output else: return bboxes, labels, scores
def predict(self, imgs): x = list() sizes = list() for img in imgs: _, H, W = img.shape img = self._prepare(img) x.append(self.xp.array(img)) sizes.append((H, W)) with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): x = chainer.Variable(self.xp.stack(x)) arm_locs, arm_confs, odm_locs, odm_confs = self(x) arm_locs, arm_confs = arm_locs.array, arm_confs.array odm_locs, odm_confs = odm_locs.array, odm_confs.array bboxes = list() labels = list() scores = list() for arm_loc, arm_conf, odm_loc, odm_conf, size in zip( arm_locs, arm_confs, odm_locs, odm_confs, sizes): bbox, label, score = self.coder.decode(arm_loc, arm_conf, odm_loc, odm_conf, self.nms_thresh, self.score_thresh) bbox = transforms.resize_bbox(bbox, (self.insize, self.insize), size) bboxes.append(chainer.cuda.to_cpu(bbox)) labels.append(chainer.cuda.to_cpu(label)) scores.append(chainer.cuda.to_cpu(score)) return bboxes, labels, scores
def __call__(self, in_data): img, bbox, label = in_data # 1. Color augumentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param["y_offset"], x_offset=param["x_offset"]) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param["y_slice"], x_slice=param["x_slice"], allow_outside_center=False, return_param=True) label = label[param["index"]] # 4. Resizing with random interpolation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Transformation for SSD network input img -= self.mean mb_loc, mb_lab = self.coder.encode(bbox, label) return img, mb_loc, mb_lab
def __call__(self, in_data): assert len(in_data) == 6 img, bbox, label, mask, lbl_vis, lbl_occ = in_data # H, W, C -> C, H, W img = img.transpose(2, 0, 1) lbl_occ = lbl_occ.transpose(2, 0, 1) if not self.train: return img, bbox, label, mask, lbl_vis, lbl_occ imgs, sizes, scales = self.mask_rcnn.prepare([img]) img = imgs[0] H, W = sizes[0] scale = scales[0] # _, o_H, o_W = img.shape o_H, o_W = int(round(scale * H)), int(round(scale * W)) if len(bbox) > 0: bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) if len(mask) > 0: mask = transforms.resize(mask, size=(o_H, o_W), interpolation=0) mask = mask.transpose(1, 2, 0) mask = pad_multiple_of(mask, mode='constant', constant_values=-1) mask = mask.transpose(2, 0, 1) assert mask.shape[1:] == img.shape[1:] lbl_vis = transforms.resize(lbl_vis[None], size=(o_H, o_W), interpolation=0)[0] lbl_occ = transforms.resize(lbl_occ, size=(o_H, o_W), interpolation=0) lbl_vis = pad_multiple_of(lbl_vis, mode='constant', constant_values=-1) lbl_occ = lbl_occ.transpose(1, 2, 0) lbl_occ = pad_multiple_of(lbl_occ, mode='constant', constant_values=-1) lbl_occ = lbl_occ.transpose(2, 0, 1) assert lbl_vis.shape == img.shape[1:] assert lbl_occ.shape[1:] == img.shape[1:] # # horizontally flip # img, params = transforms.random_flip( # img, x_random=True, return_param=True) # bbox = transforms.flip_bbox( # bbox, (o_H, o_W), x_flip=params['x_flip']) # if mask.ndim == 2: # mask = transforms.flip( # mask[None, :, :], x_flip=params['x_flip'])[0] # else: # mask = transforms.flip(mask, x_flip=params['x_flip']) # lbl_vis = transforms.flip(lbl_vis[None], x_flip=params['x_flip'])[0] # lbl_occ = transforms.flip(lbl_occ, x_flip=params['x_flip']) keep = (mask == 1).sum(axis=(1, 2)) > 0 bbox = bbox[keep] label = label[keep] mask = mask[keep] return img, bbox, label, mask, scale, lbl_vis, lbl_occ
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping # 6. Random vertical flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) # 6. Random vertical flipping img, params = transforms.random_flip(img, y_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), y_flip=params['y_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bounding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ x = [] sizes = [] for img in imgs: _, H, W = img.shape img = self._prepare(img) x.append(self.xp.array(img)) sizes.append((H, W)) with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): x = chainer.Variable(self.xp.stack(x)) mb_locs, mb_confs = self.forward(x) mb_locs, mb_confs = mb_locs.array, mb_confs.array bboxes = [] labels = [] scores = [] for mb_loc, mb_conf, size in zip(mb_locs, mb_confs, sizes): bbox, label, score = self.coder.decode(mb_loc, mb_conf, self.nms_thresh, self.score_thresh) bbox = transforms.resize_bbox(bbox, (self.insize, self.insize), size) bboxes.append(chainer.backends.cuda.to_cpu(bbox)) labels.append(chainer.backends.cuda.to_cpu(label)) scores.append(chainer.backends.cuda.to_cpu(score)) return bboxes, labels, scores
def __call__(self, in_data): if len(in_data) == 6: img, bbox, label, mask, crowd, area = in_data elif len(in_data) == 4: img, bbox, label, mask = in_data else: raise ValueError img = img.transpose(2, 0, 1) # H, W, C -> C, H, W if not self.train: if len(in_data) == 6: return img, bbox, label, mask, crowd, area elif len(in_data) == 4: return img, bbox, label, mask else: raise ValueError imgs, sizes, scales = self.mask_rcnn.prepare([img]) # print(type(imgs)) # print(type(sizes)) # print(type(scales)) img = imgs[0] H, W = sizes[0] scale = scales[0] _, o_H, o_W = img.shape if len(bbox) > 0: bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) if len(mask) > 0: mask = transforms.resize( mask, size=(o_H, o_W), interpolation=0) # # horizontally flip # img, params = transforms.random_flip( # img, x_random=True, return_param=True) # bbox = transforms.flip_bbox( # bbox, (o_H, o_W), x_flip=params['x_flip']) # if mask.ndim == 2: # mask = transforms.flip( # mask[None, :, :], x_flip=params['x_flip'])[0] # else: # mask = transforms.flip(mask, x_flip=params['x_flip']) # horizontally and vartically flip img, params = transforms.random_flip( img, y_random=True, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (o_H, o_W), y_flip=params['y_flip'], x_flip=params['x_flip']) if mask.ndim == 2: mask = transforms.flip( mask[None, :, :], y_flip=params['y_flip'], x_flip=params['x_flip'])[0] else: mask = transforms.flip(mask, y_flip=params['y_flip'], x_flip=params['x_flip']) return img, bbox, label, mask, scale, sizes
def __call__(self, in_data): # 5段階のステップでデータの水増しを行う # 1. 色の拡張 # 2. ランダムな拡大 # 3. ランダムなトリミング # 4. ランダムな補完の再補正 # 5. ランダムな水平反転 img, bbox, label = in_data # 1. 色の拡張 # 明るさ,コントラスト,彩度,色相を組み合わせ,データ拡張をする img = random_distort(img) # 2. ランダムな拡大 if np.random.randint(2): # キャンバスの様々な座標に入力画像を置いて,様々な比率の画像を生成し,bounding boxを更新 img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. ランダムなトリミング img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) # トリミングされた画像内にbounding boxが入るように調整 bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. ランダムな補完の再補正 ## 画像とbounding boxのリサイズ _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. ランダムな水平反転 ## 画像とbounding boxをランダムに水平方向に反転 img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) # SSDのネットワークに入力するための準備の処理 img -= self.mean ## SSDに入力するためのloc(デフォルトbounding boxのオフセットとスケール)と ## mb_label(クラスを表す配列)を出力 mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def test_resize_bbox(self): bbox = np.random.uniform(low=0., high=32., size=(10, 5)) out = resize_bbox(bbox, input_shape=(32, 32), output_shape=(64, 128)) bbox_expected = bbox.copy() bbox_expected[:, 0] = bbox[:, 0] * 4 bbox_expected[:, 1] = bbox[:, 1] * 2 bbox_expected[:, 2] = bbox[:, 2] * 4 bbox_expected[:, 3] = bbox[:, 3] * 2 np.testing.assert_equal(out, bbox_expected)
def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :obj:`(y_min, x_min, y_max, x_max)` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ x = list() sizes = list() for img in imgs: _, H, W = img.shape img = self._prepare(img) x.append(self.xp.array(img)) sizes.append((H, W)) with chainer.function.no_backprop_mode(): x = chainer.Variable(self.xp.stack(x)) loc, conf = self(x) raw_bboxes, raw_scores = self._decode(loc.data, conf.data) bboxes = list() labels = list() scores = list() for raw_bbox, raw_score, size in zip(raw_bboxes, raw_scores, sizes): raw_bbox = transforms.resize_bbox(raw_bbox, (1, 1), size) bbox, label, score = self._suppress(raw_bbox, raw_score) bboxes.append(chainer.cuda.to_cpu(bbox)) labels.append(chainer.cuda.to_cpu(label)) scores.append(chainer.cuda.to_cpu(score)) return bboxes, labels, scores
def __call__(self, in_data): img, bbox, label, label_img = in_data _, H, W = img.shape img = self.faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) for i, im in enumerate(label_img): label_img[i] = cv2.resize(im, (o_W, o_H), interpolation=cv2.INTER_NEAREST) return img, bbox, label, label_img, scale
def test_resize_bbox(self): in_size = (32, 24) out_size = (in_size[0] * 2, in_size[1] * 4) bbox = generate_random_bbox(10, in_size, 0, min(in_size)) out = resize_bbox(bbox, in_size=in_size, out_size=out_size) bbox_expected = bbox.copy() bbox_expected[:, 0] = bbox[:, 0] * 2 bbox_expected[:, 1] = bbox[:, 1] * 4 bbox_expected[:, 2] = bbox[:, 2] * 2 bbox_expected[:, 3] = bbox[:, 3] * 4 np.testing.assert_equal(out, bbox_expected)
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape img = preprocess(img, self.min_size, self.max_size) _, o_H, o_W = img.shape scale = o_H / H bbox = resize_bbox(bbox, (H, W), (o_H, o_W)) # horizontally flip img, params = random_flip(img, x_random=True, return_param=True) bbox = flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip']) return img, bbox, label, [scale, scale]
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape img = self.faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) # horizontally flip img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params["x_flip"]) return img, bbox, label, scale
def demo(self, imgs, detection=True, segmentation=True): if self.segmentation: segmentation = segmentation else: segmentation = self.segmentation if self.detection: detection = detection x = [] sizes = [] for img in imgs: _, H, W = img.shape img = self._prepare(img) x.append(self.xp.array(img)) sizes.append((H, W)) with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): x = chainer.Variable(self.xp.stack(x)) result_detection, result_segmentation = self(x) bboxes = [] labels = [] scores = [] masks = [] if detection: mb_locs, mb_confs = result_detection # TODO: for detection mb_locs, mb_confs = mb_locs.array, mb_confs.array for mb_loc, mb_conf, size in zip(mb_locs, mb_confs, sizes): bbox, label, score = self.coder.decode(mb_loc, mb_conf, self.nms_thresh, self.score_thresh) bbox = transforms.resize_bbox(bbox, (self.insize, self.insize), size) bboxes.append(chainer.backends.cuda.to_cpu(bbox)) labels.append(chainer.backends.cuda.to_cpu(label)) scores.append(chainer.backends.cuda.to_cpu(score)) if segmentation: # TODO: for segmentation mask = F.argmax(result_segmentation, axis=1) num, _, _ = mask.shape mask = mask.array for i, size in enumerate(sizes): mask_ = mask[i, :, :] mask_ = mask_resize_with_nearest(mask_, size) masks.append(chainer.backends.cuda.to_cpu(mask_)) return bboxes, labels, scores, masks
def transform(in_data): img, bbox, label = in_data _, H, W = img.shape img = faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (W, H), (o_W, o_H)) # horizontally flip img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (o_W, o_H), params['x_flip']) return img, bbox, label, scale
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data bbox = np.array(bbox).astype(np.float32) if len(bbox) == 0: warnings.warn("No bounding box detected", RuntimeWarning) img = resize_with_random_interpolation(img, (self.size, self.size)) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape img = self.faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) # horizontally flip img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (o_H, o_W), x_flip=params['x_flip']) return img, bbox, label, scale
def __call__(self, in_data): img, bbox, keypoints = in_data _, H, W = img.shape img = self.faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) label = np.zeros(bbox.shape[0], dtype=np.int32) # shape of keypoints is (N, 17, 3), N is number of bbox, 17 is number of keypoints, 3 is (x, y, v) # v=0: unlabeled, v=1, labeled but invisible, v=2 labeled and visible keypoints = keypoints.astype(np.float32) kp = keypoints[:, :, [1, 0]] kp = np.concatenate([kp * scale, keypoints[:, :, 2, None]], axis=2) return img, bbox, label, kp, scale
def __call__(self, in_data): img, mask, label = in_data bbox = mask_to_bbox(mask) _, orig_H, orig_W = img.shape img = self.fcis.prepare(img) _, H, W = img.shape scale = H / orig_H mask = transforms.resize(mask.astype(np.float32), (H, W)) bbox = transforms.resize_bbox(bbox, (orig_H, orig_W), (H, W)) img, params = transforms.random_flip(img, x_random=True, return_param=True) mask = transforms.flip(mask, x_flip=params['x_flip']) bbox = transforms.flip_bbox(bbox, (H, W), x_flip=params['x_flip']) return img, mask, label, bbox, scale
def get_example(self, i): """Returns the i-th example. Args: i (int): The index of the example. Returns: tuple of an image and its label. The image is in CHW format and its color channel is ordered in RGB. a bounding box is appended to the returned value. """ #print("The image file name is %s"%self.images[i][0:-4]) img = utils.read_image(os.path.join(self.data_dir, 'images', self.images[i]), color=True) # Add processing to the other two channels with warnings.catch_warnings(): # print("read in by expanding") warnings.simplefilter("ignore") img[1, :, :] = exposure.rescale_intensity( exposure.equalize_adapthist( exposure.rescale_intensity(img[1, :, :])), out_range=(0, 255)) img[2, :, :] = exposure.rescale_intensity(filters.gaussian( exposure.rescale_intensity(img[2, :, :])), out_range=(0, 255)) # bbs should be a matrix (m by 4). m is the number of bounding # boxes in the image # labels should be an integer array (m by 1). m is the same as the bbs bbs_file = os.path.join(self.data_dir, 'bounding_boxes', self.images[i][0:-4] + '.txt') label_bbs = np.loadtxt(bbs_file, dtype=np.float32) label = label_bbs[:, 0].astype(np.int32) bbs = label_bbs[:, 1:5] _, H, W = img.shape if self.resize and (H != self.img_size or W != self.img_size): img = transforms.resize(img, (self.img_size, self.img_size)) bbs = transforms.resize_bbox(bbs, (H, W), (self.img_size, self.img_size)) return img, bbs, label
def transform(in_data): img, bbox = in_data img -= np.array([103.939, 116.779, 123.68])[:, None, None] # Resize bounding box to a shape # with the smaller edge at least at length 600 input_shape = img.shape[1:] output_shape = _shape_soft_min_hard_max(input_shape, 600, 1200) img = transforms.resize(img, output_shape) bbox = transforms.resize_bbox(bbox, input_shape, output_shape) # horizontally flip img, flips = transforms.random_flip(img, horizontal_flip=True, return_flip=True) h_flip = flips['h'] bbox = transforms.flip_bbox(bbox, output_shape, h_flip) return img, bbox
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape # random brightness and contrast img = random_distort(img) img = self.faster_rcnn.prepare(img) _, o_H, o_W = img.shape bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) # horizontally & vertical flip img, params = transforms.random_flip( img, x_random=True, y_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (o_H, o_W), x_flip=params['x_flip'], y_flip=params['y_flip']) scale = o_H / t_H return img, bbox, label, scale
def __call__(self, in_data): if len(in_data) == 6: img, bbox, label, mask, crowd, area = in_data elif len(in_data) == 4: img, bbox, label, mask = in_data else: raise ValueError img = img.transpose(2, 0, 1) # H, W, C -> C, H, W if not self.train: if len(in_data) == 6: return img, bbox, label, mask, crowd, area elif len(in_data) == 4: return img, bbox, label, mask else: raise ValueError _, H, W = img.shape img = self.mask_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H if len(bbox) > 0: bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) if len(mask) > 0: mask = transforms.resize( mask, size=(o_H, o_W), interpolation=0) # horizontally flip img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (o_H, o_W), x_flip=params['x_flip']) if mask.ndim == 2: mask = transforms.flip( mask[None, :, :], x_flip=params['x_flip'])[0] else: mask = transforms.flip(mask, x_flip=params['x_flip']) return img, bbox, label, mask, scale
def get_example(self, i): try: image, label = super().get_example(i) except Exception as e: print(e) image, label = super().get_example(0) if len(label.shape) > 0 and len(label) % 4 == 0: num_bboxes = len(label) // 4 label = numpy.reshape(label, (num_bboxes, -1)) if image.shape[0] == 1: image = numpy.tile(image, (3, 1, 1)) if self.augmentations is not None: image = numpy.transpose(image, (1, 2, 0)) image = image.astype(numpy.uint8) image = self.augmentations.augment_images([image])[0] image = image.astype(numpy.float32) image = numpy.transpose(image, (2, 0, 1)) if self.image_size is not None: image_size = image.shape[-2:] if len(label.shape) > 1: # we are likely dealing with bboxes self.check_for_bad_label(label, image_size) label = transforms.resize_bbox(label.astype(numpy.float32), image_size, self.image_size) image = resize_image(image, self.image_size, image_mode=self.image_mode) label = label.astype(self._label_dtype) if len(image.shape) == 2: image = image[None, ...] if self.return_dummy_scores: return image / 255, label, numpy.zeros((1, )) return image / 255, label
def __call__(self, in_data): img, bbox, label = in_data img = random_distort(img) if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ x = [] params = [] for img in imgs: _, H, W = img.shape img, param = transforms.resize_contain(img / 255, (self.insize, self.insize), fill=0.5, return_param=True) x.append(self.xp.array(img)) param['size'] = (H, W) params.append(param) with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): locs, objs, confs = self(self.xp.stack(x)) locs = locs.array objs = objs.array confs = confs.array bboxes = [] labels = [] scores = [] for loc, obj, conf, param in zip(locs, objs, confs, params): bbox, label, score = self._decode(loc, obj, conf) bbox = cuda.to_cpu(bbox) label = cuda.to_cpu(label) score = cuda.to_cpu(score) bbox = transforms.translate_bbox(bbox, -self.insize / 2, -self.insize / 2) bbox = transforms.resize_bbox(bbox, param['scaled_size'], param['size']) bbox = transforms.translate_bbox(bbox, param['size'][0] / 2, param['size'][1] / 2) bboxes.append(bbox) labels.append(label) scores.append(score) return bboxes, labels, scores
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape # random brightness and contrast img = random_distort(img) # rotate image # return a tuple whose elements are rotated image, param. # k (int in param)represents the number of times the image is rotated by 90 degrees. img, params = transforms.random_rotate(img, return_param=True) # restore the new hight and width _, t_H, t_W = img.shape # rotate bbox based on renewed parameters bbox = rotate_bbox(bbox, (H, W), params['k']) # Random expansion:This method randomly place the input image on # a larger canvas. The size of the canvas is (rH,rW), r is a random ratio drawn from [1,max_ratio]. # The canvas is filled by a value fill except for the region where the original image is placed. if np.random.randint(2): fill_value = img.mean(axis=1).mean(axis=1).reshape(-1, 1, 1) img, param = transforms.random_expand(img, max_ratio=2, fill=fill_value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # Random crop # crops the image with bounding box constraints img, param = random_crop_with_bbox_constraints(img, bbox, min_scale=0.5, max_aspect_ratio=1.5, return_param=True) # this translates bounding boxes to fit within the cropped area of an image, bounding boxes whose centers are outside of the cropped area are removed. bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) #assigning new labels to the bounding boxes after cropping label = label[param['index']] # if the bounding boxes are all removed, if bbox.shape[0] == 0: img, bbox, label = in_data # update the height and width of the image _, t_H, t_W = img.shape img = self.faster_rcnn.prepare(img) # prepares the image to match the size of the image to be input into the RCNN _, o_H, o_W = img.shape # resize the bounding box according to the image resize bbox = transforms.resize_bbox(bbox, (t_H, t_W), (o_H, o_W)) # horizontally & vertical flip # simutaneously flip horizontally and vertically of the image img, params = transforms.random_flip(img, x_random=True, y_random=True, return_param=True) # flip the bounding box with respect to the parameter bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip'], y_flip=params['y_flip']) scale = o_H / t_H return img, bbox, label, scale
def __call__(self, in_data): """in_data includes three datas. Args: img(array): Shape is (3, H, W). range is [0, 255]. bbox(array): Shape is (N, 4). (y_min, x_min, y_max, x_max). range is [0, max size of boxes]. label(array): Classes of bounding boxes. Returns: img(array): Shape is (3, out_H, out_W). range is [0, 1]. interpolation value equals to self.value. """ # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping if self.count % 10 == 0 and self.count % self.batchsize == 0 and self.count != 0: self.i += 1 i = self.i % len(self.dim) self.output_shape = (self.dim[i], self.dim[i]) self.count += 1 img, bbox, label = in_data # 1. Color augmentation img = random_distort(img, brightness_delta=32, contrast_low=0.5, contrast_high=1.5, saturation_low=0.5, saturation_high=1.5, hue_delta=25) # Normalize. range is [0, 1] img /= 255.0 _, H, W = img.shape scale = np.random.uniform(0.25, 2) random_expand = np.random.uniform(0.8, 1.2, 2) net_h, net_w = self.output_shape out_h = net_h * scale # random_expand[0] out_w = net_w * scale # random_expand[1] if H > W: out_w = out_h * (float(W) / H) * np.random.uniform(0.8, 1.2) elif H < W: out_h = out_w * (float(H) / W) * np.random.uniform(0.8, 1.2) out_h = int(out_h) out_w = int(out_w) img = resize_with_random_interpolation(img, (out_h, out_w)) bbox = transforms.resize_bbox(bbox, (H, W), (out_h, out_w)) if out_h < net_h and out_w < net_w: img, param = expand(img, out_h=net_h, out_w=net_w, fill=self.value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) else: out_h = net_h if net_h > out_h else int(out_h * 1.05) out_w = net_w if net_w > out_w else int(out_w * 1.05) img, param = expand(img, out_h=out_h, out_w=out_w, fill=self.value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) img, param = crop_with_bbox_constraints(img, bbox, return_param=True, crop_height=net_h, crop_width=net_w) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 5. Random horizontal flipping # OK img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, self.output_shape, x_flip=params['x_flip']) # Preparation for Yolov2 network. scale=[0, 1] bbox[:, ::2] /= self.output_shape[0] # y bbox[:, 1::2] /= self.output_shape[1] # x num_bbox = len(bbox) len_max = max(num_bbox, self.max_target) out_bbox = np.zeros((len_max, 4), dtype='f') out_bbox[:num_bbox] = bbox[:num_bbox] out_label = np.zeros((len_max), dtype='i') out_label[:num_bbox] = label out_bbox = out_bbox[:self.max_target] out_label = out_label[:self.max_target] num_array = min(num_bbox, self.max_target) gmap = create_map_anchor_gt(bbox, self.anchors, self.output_shape, self.downscale, self.n_boxes, len_max) gmap = gmap[:self.max_target] img = np.clip(img, 0, 1) return img, out_bbox, out_label, gmap, np.array([num_array], dtype='i')