def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping # 6. Random vertical flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) # 6. Random vertical flipping img, params = transforms.random_flip(img, y_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), y_flip=params['y_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def test_flip_bbox(self): bbox = np.random.uniform(low=0., high=32., size=(10, 4)) out = flip_bbox(bbox, size=(32, 34), x_flip=True) bbox_expected = bbox.copy() bbox_expected[:, 0] = 31 - bbox[:, 2] bbox_expected[:, 2] = 31 - bbox[:, 0] np.testing.assert_equal(out, bbox_expected) out = flip_bbox(bbox, size=(32, 34), y_flip=True) bbox_expected = bbox.copy() bbox_expected[:, 1] = 33 - bbox[:, 3] bbox_expected[:, 3] = 33 - bbox[:, 1] np.testing.assert_equal(out, bbox_expected)
def test_flip_bbox(self): size = (32, 24) bbox = generate_random_bbox(10, size, 0, min(size)) out = flip_bbox(bbox, size=size, y_flip=True) bbox_expected = bbox.copy() bbox_expected[:, 0] = size[0] - bbox[:, 2] bbox_expected[:, 2] = size[0] - bbox[:, 0] np.testing.assert_equal(out, bbox_expected) out = flip_bbox(bbox, size=size, x_flip=True) bbox_expected = bbox.copy() bbox_expected[:, 1] = size[1] - bbox[:, 3] bbox_expected[:, 3] = size[1] - bbox[:, 1] np.testing.assert_equal(out, bbox_expected)
def __call__(self, in_data): # There are five data augmentation steps # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 3. Random cropping if self.random_crop and np.random.rand() > 0.5: next_img, param = random_crop_with_bbox_constraints( img, bbox, min_scale=min(self.crop_rate), max_scale=max(self.crop_rate), return_param=True) next_bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) if (len(label[param['index']]) != 0): label = label[param['index']] img, bbox = next_img, next_bbox # 4. Resizing with random interpolatation _, H, W = img.shape img = transforms.resize(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping if self.flip: img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) img -= self.mean img /= self.std _, height, width = img.shape ymin = bbox[:, 0] xmin = bbox[:, 1] ymax = bbox[:, 2] xmax = bbox[:, 3] one_hot_label = np.eye(self.n_class)[label] xs = (xmin + (xmax - xmin) // 2) / width ws = (xmax - xmin) / width ys = (ymin + (ymax - ymin) // 2) / height hs = (ymax - ymin) / height t = [{ 'label': l, 'x': x, 'w': w, 'y': y, 'h': h, 'one_hot_label': hot } for l, x, w, y, h, hot in zip(label, xs, ws, ys, hs, one_hot_label)] return img, t
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape # random brightness and contrast img = random_distort(img) # rotate image # return a tuple whose elements are rotated image, param. # k (int in param)represents the number of times the image is rotated by 90 degrees. img, params = transforms.random_rotate(img, return_param=True) # restore the new hight and width _, t_H, t_W = img.shape # rotate bbox based on renewed parameters bbox = rotate_bbox(bbox, (H, W), params['k']) img = self.faster_rcnn.prepare(img) # prepares the image to match the size of the image to be input into the RCNN _, o_H, o_W = img.shape # resize the bounding box according to the image resize bbox = transforms.resize_bbox(bbox, (t_H, t_W), (o_H, o_W)) # horizontally & vertical flip # simutaneously flip horizontally and vertically of the image img, params = transforms.random_flip(img, x_random=True, y_random=True, return_param=True) # flip the bounding box with respect to the parameter bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip'], y_flip=params['y_flip']) scale = o_H / t_H return img, bbox, label, scale
def transform(in_data): img, bbox, label = in_data img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, img.shape[1:], x_flip=params['x_flip']) return img, bbox, label
def __call__(self, in_data): if len(in_data) == 4: img, mask, label, bbox = in_data else: img, bbox, label = in_data # Flipping img, params = transforms.random_flip(img, x_random=True, return_param=True) x_flip = params['x_flip'] bbox = transforms.flip_bbox(bbox, img.shape[1:], x_flip=x_flip) # Scaling and mean subtraction img, scale = scale_img(img, self.min_size, self.max_size) img -= self.mean bbox = bbox * scale if len(in_data) == 4: mask = transforms.flip(mask, x_flip=x_flip) mask = transforms.resize(mask.astype(np.float32), img.shape[1:], interpolation=PIL.Image.NEAREST).astype( np.bool) return img, bbox, label, mask else: return img, bbox, label
def __call__(self, in_data): img, bbox, label = in_data # Flipping img, params = transforms.random_flip(img, x_random=True, return_param=True) x_flip = params['x_flip'] bbox = transforms.flip_bbox(bbox, img.shape[1:], x_flip=x_flip) return img, bbox, label
def __call__(self, in_data): if len(in_data) == 6: img, bbox, label, mask, crowd, area = in_data elif len(in_data) == 4: img, bbox, label, mask = in_data else: raise ValueError img = img.transpose(2, 0, 1) # H, W, C -> C, H, W if not self.train: if len(in_data) == 6: return img, bbox, label, mask, crowd, area elif len(in_data) == 4: return img, bbox, label, mask else: raise ValueError imgs, sizes, scales = self.mask_rcnn.prepare([img]) # print(type(imgs)) # print(type(sizes)) # print(type(scales)) img = imgs[0] H, W = sizes[0] scale = scales[0] _, o_H, o_W = img.shape if len(bbox) > 0: bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) if len(mask) > 0: mask = transforms.resize( mask, size=(o_H, o_W), interpolation=0) # # horizontally flip # img, params = transforms.random_flip( # img, x_random=True, return_param=True) # bbox = transforms.flip_bbox( # bbox, (o_H, o_W), x_flip=params['x_flip']) # if mask.ndim == 2: # mask = transforms.flip( # mask[None, :, :], x_flip=params['x_flip'])[0] # else: # mask = transforms.flip(mask, x_flip=params['x_flip']) # horizontally and vartically flip img, params = transforms.random_flip( img, y_random=True, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (o_H, o_W), y_flip=params['y_flip'], x_flip=params['x_flip']) if mask.ndim == 2: mask = transforms.flip( mask[None, :, :], y_flip=params['y_flip'], x_flip=params['x_flip'])[0] else: mask = transforms.flip(mask, y_flip=params['y_flip'], x_flip=params['x_flip']) return img, bbox, label, mask, scale, sizes
def __call__(self, in_data): # 5段階のステップでデータの水増しを行う # 1. 色の拡張 # 2. ランダムな拡大 # 3. ランダムなトリミング # 4. ランダムな補完の再補正 # 5. ランダムな水平反転 img, bbox, label = in_data # 1. 色の拡張 # 明るさ,コントラスト,彩度,色相を組み合わせ,データ拡張をする img = random_distort(img) # 2. ランダムな拡大 if np.random.randint(2): # キャンバスの様々な座標に入力画像を置いて,様々な比率の画像を生成し,bounding boxを更新 img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. ランダムなトリミング img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) # トリミングされた画像内にbounding boxが入るように調整 bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. ランダムな補完の再補正 ## 画像とbounding boxのリサイズ _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. ランダムな水平反転 ## 画像とbounding boxをランダムに水平方向に反転 img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) # SSDのネットワークに入力するための準備の処理 img -= self.mean ## SSDに入力するためのloc(デフォルトbounding boxのオフセットとスケール)と ## mb_label(クラスを表す配列)を出力 mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape img = preprocess(img, self.min_size, self.max_size) _, o_H, o_W = img.shape scale = o_H / H bbox = resize_bbox(bbox, (H, W), (o_H, o_W)) # horizontally flip img, params = random_flip(img, x_random=True, return_param=True) bbox = flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip']) return img, bbox, label, [scale, scale]
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape img = self.faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) # horizontally flip img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params["x_flip"]) return img, bbox, label, scale
def transform(in_data): img, bbox, label = in_data _, H, W = img.shape img = faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (W, H), (o_W, o_H)) # horizontally flip img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (o_W, o_H), params['x_flip']) return img, bbox, label, scale
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data bbox = np.array(bbox).astype(np.float32) if len(bbox) == 0: warnings.warn("No bounding box detected", RuntimeWarning) img = resize_with_random_interpolation(img, (self.size, self.size)) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape img = self.faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) # horizontally flip img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (o_H, o_W), x_flip=params['x_flip']) return img, bbox, label, scale
def __call__(self, in_data): img, mask, label = in_data bbox = mask_to_bbox(mask) _, orig_H, orig_W = img.shape img = self.fcis.prepare(img) _, H, W = img.shape scale = H / orig_H mask = transforms.resize(mask.astype(np.float32), (H, W)) bbox = transforms.resize_bbox(bbox, (orig_H, orig_W), (H, W)) img, params = transforms.random_flip(img, x_random=True, return_param=True) mask = transforms.flip(mask, x_flip=params['x_flip']) bbox = transforms.flip_bbox(bbox, (H, W), x_flip=params['x_flip']) return img, mask, label, bbox, scale
def transform(in_data): img, bbox = in_data img -= np.array([103.939, 116.779, 123.68])[:, None, None] # Resize bounding box to a shape # with the smaller edge at least at length 600 input_shape = img.shape[1:] output_shape = _shape_soft_min_hard_max(input_shape, 600, 1200) img = transforms.resize(img, output_shape) bbox = transforms.resize_bbox(bbox, input_shape, output_shape) # horizontally flip img, flips = transforms.random_flip(img, horizontal_flip=True, return_flip=True) h_flip = flips['h'] bbox = transforms.flip_bbox(bbox, output_shape, h_flip) return img, bbox
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape # random brightness and contrast img = random_distort(img) img = self.faster_rcnn.prepare(img) _, o_H, o_W = img.shape bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) # horizontally & vertical flip img, params = transforms.random_flip( img, x_random=True, y_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (o_H, o_W), x_flip=params['x_flip'], y_flip=params['y_flip']) scale = o_H / t_H return img, bbox, label, scale
def __call__(self, in_data): if len(in_data) == 5: img, label, bbox, mask, i = in_data elif len(in_data) == 4: img, bbox, label, i = in_data _, H, W = img.shape img = self.net.prepare(img) _, o_H, o_W = img.shape scale = o_H / H if len(bbox) == 0: return img, [], [], 1 bbox = resize_bbox(bbox, (H, W), (o_H, o_W)) mask = resize(mask, (o_H, o_W)) #horizontal flip img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip']) mask = transforms.flip(mask, x_flip=params['x_flip']) cv2.imwrite("gt_roi.png", mask[0] * 255) return img, bbox, label, scale, mask
def __call__(self, in_data): if len(in_data) == 6: img, bbox, label, mask, crowd, area = in_data elif len(in_data) == 4: img, bbox, label, mask = in_data else: raise ValueError img = img.transpose(2, 0, 1) # H, W, C -> C, H, W if not self.train: if len(in_data) == 6: return img, bbox, label, mask, crowd, area elif len(in_data) == 4: return img, bbox, label, mask else: raise ValueError _, H, W = img.shape img = self.mask_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H if len(bbox) > 0: bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) if len(mask) > 0: mask = transforms.resize( mask, size=(o_H, o_W), interpolation=0) # horizontally flip img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (o_H, o_W), x_flip=params['x_flip']) if mask.ndim == 2: mask = transforms.flip( mask[None, :, :], x_flip=params['x_flip'])[0] else: mask = transforms.flip(mask, x_flip=params['x_flip']) return img, bbox, label, mask, scale
def __call__(self, in_data): if len(in_data)==5: img, label, bbox, mask, i = in_data elif len(in_data)==4: img, bbox, label, i= in_data label = [self.labelids.index(l) + 1 for l in label] _, H, W = img.shape if chainer.config.train: img = self.net.prepare(img) _, o_H, o_W = img.shape scale = o_H / H if len(bbox)==0: return img, [],[],1 bbox = resize_bbox(bbox, (H, W), (o_H, o_W)) mask = resize(mask,(o_H, o_W)) if chainer.config.train: #horizontal flip img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (o_H, o_W), x_flip=params['x_flip']) mask = transforms.flip(mask, x_flip=params['x_flip']) return img, bbox, label, scale, mask, i
def __call__(self, in_data): img, bbox, label = in_data img = random_distort(img) if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping # mask = None img, bbox, label, mask = in_data # TODO: show information # self._show_img(img) # self._show_mask(mask) # 1. Color augmentation img = random_distort(img) # self._show_img(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) if mask is not None: _, new_height, new_width = img.shape param['new_height'] = new_height param['new_width'] = new_width mask = self._random_expand_mask(mask, param) # self._show_img(img) # self._show_mask(mask) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) # self._show_img(img) mask = self._fixed_crop_mask(mask, param['y_slice'], param['x_slice']) # self._show_mask(mask) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) # self._show_img(img) if mask is not None: if mask.size == 0: raise RuntimeError mask = self._resize_with_nearest(mask, (self.size, self.size)) # self._show_mask(mask) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) if mask is not None: mask = self._random_flip_mask(mask, x_flip=params['x_flip'], y_flip=params['y_flip']) # self._show_img(img) # self._show_mask(mask) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) if mask is None: mask = np.ones([self.size, self.size], dtype=np.int32) * -1 # print("Dtype is :"+str(mask.dtype)) data_type = str(mask.dtype) target_type = 'int32' if data_type != target_type: mask = mask.astype(np.int32) if img is None: raise RuntimeError return img, mb_loc, mb_label, mask
def __call__(self, in_data): """in_data includes three datas. Args: img(array): Shape is (3, H, W). range is [0, 255]. bbox(array): Shape is (N, 4). (y_min, x_min, y_max, x_max). range is [0, max size of boxes]. label(array): Classes of bounding boxes. Returns: img(array): Shape is (3, out_H, out_W). range is [0, 1]. interpolation value equals to self.value. """ # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping if self.count % 10 == 0 and self.count % self.batchsize == 0 and self.count != 0: self.i += 1 i = self.i % len(self.dim) self.output_shape = (self.dim[i], self.dim[i]) self.count += 1 img, bbox, label = in_data # 1. Color augmentation img = random_distort(img, brightness_delta=32, contrast_low=0.5, contrast_high=1.5, saturation_low=0.5, saturation_high=1.5, hue_delta=25) # Normalize. range is [0, 1] img /= 255.0 _, H, W = img.shape scale = np.random.uniform(0.25, 2) random_expand = np.random.uniform(0.8, 1.2, 2) net_h, net_w = self.output_shape out_h = net_h * scale # random_expand[0] out_w = net_w * scale # random_expand[1] if H > W: out_w = out_h * (float(W) / H) * np.random.uniform(0.8, 1.2) elif H < W: out_h = out_w * (float(H) / W) * np.random.uniform(0.8, 1.2) out_h = int(out_h) out_w = int(out_w) img = resize_with_random_interpolation(img, (out_h, out_w)) bbox = transforms.resize_bbox(bbox, (H, W), (out_h, out_w)) if out_h < net_h and out_w < net_w: img, param = expand(img, out_h=net_h, out_w=net_w, fill=self.value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) else: out_h = net_h if net_h > out_h else int(out_h * 1.05) out_w = net_w if net_w > out_w else int(out_w * 1.05) img, param = expand(img, out_h=out_h, out_w=out_w, fill=self.value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) img, param = crop_with_bbox_constraints(img, bbox, return_param=True, crop_height=net_h, crop_width=net_w) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 5. Random horizontal flipping # OK img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, self.output_shape, x_flip=params['x_flip']) # Preparation for Yolov2 network. scale=[0, 1] bbox[:, ::2] /= self.output_shape[0] # y bbox[:, 1::2] /= self.output_shape[1] # x num_bbox = len(bbox) len_max = max(num_bbox, self.max_target) out_bbox = np.zeros((len_max, 4), dtype='f') out_bbox[:num_bbox] = bbox[:num_bbox] out_label = np.zeros((len_max), dtype='i') out_label[:num_bbox] = label out_bbox = out_bbox[:self.max_target] out_label = out_label[:self.max_target] num_array = min(num_bbox, self.max_target) gmap = create_map_anchor_gt(bbox, self.anchors, self.output_shape, self.downscale, self.n_boxes, len_max) gmap = gmap[:self.max_target] img = np.clip(img, 0, 1) return img, out_bbox, out_label, gmap, np.array([num_array], dtype='i')
def __call__(self, in_data): image, label, bbox, mask = in_data _, H, W = image.shape cell_size = 32 * self.downscale # Horizontal flip if self.options['x_flip']: if np.random.randint(2) == 0: image = transforms.flip(image, x_flip=True) bbox = transforms.flip_bbox(bbox, (H, W), x_flip=True) mask = transforms.flip(mask, x_flip=True) # Random rotation (90 or 270 degrees) if self.options['rotate90']: assert H == W, 'Height and width must match when `rotate90` is set.' if np.random.randint(2) == 0: # Rotate? if np.random.randint(2) == 0: # Counter-clockwise? bbox = rotate_bbox(bbox, 1, (H, W)) image = np.rot90(image, 1, axes=(1, 2)) mask = np.rot90(mask, 1, axes=(1, 2)) else: bbox = rotate_bbox(bbox, 3, (H, W)) image = np.rot90(image, 3, axes=(1, 2)) mask = np.rot90(mask, 3, axes=(1, 2)) _, H, W = image.shape # Zoom in / zoom out if self.options['zoom'] > 1: assert self.options[ 'scale'] <= 1.0, "`scale` shouldn't be set if `zoom` is set." max_log_zoom = np.log(self.options['zoom']) log_zoom = np.random.random() * 2 * max_log_zoom - max_log_zoom zoom = np.exp(log_zoom) if zoom > 1: # Zoom in y_size, x_size = int(H / zoom), int(W / zoom) y_offset = np.random.randint(H - y_size + 1) x_offset = np.random.randint(W - x_size + 1) y_slice = slice(y_offset, y_offset + y_size) x_slice = slice(x_offset, x_offset + x_size) bbox = transforms.crop_bbox(bbox, y_slice, x_slice) bbox *= zoom image = transforms.resize(image[:, y_slice, x_slice], (H, W)) mask = transforms.resize(mask[:, y_slice, x_slice], (H, W), interpolation=Image.NEAREST) elif zoom < 1: # Zoom out y_size, x_size = int(H / zoom), int(W / zoom) y_offset = np.random.randint(y_size - H + 1) x_offset = np.random.randint(x_size - W + 1) bbox = transforms.translate_bbox(bbox, y_offset, x_offset) new_image = np.zeros((1, y_size, x_size), dtype=np.float32) new_image[:, y_offset:y_offset + H, x_offset:x_offset + W] = image new_mask = np.zeros((1, y_size, x_size), dtype=np.float32) new_mask[:, y_offset:y_offset + H, x_offset:x_offset + W] = mask bbox *= zoom image = transforms.resize(new_image, (H, W)) mask = transforms.resize(new_mask, (H, W), interpolation=Image.NEAREST) # Random scale if self.options['scale'] > 1.0: assert self.options[ 'crop'], '`crop` must be set if `scale` is set.' max_log_scale = np.log(self.options['scale']) log_scale = np.random.random() * 2 * max_log_scale - max_log_scale scale = np.exp(log_scale) image = transforms.resize(image, (int(H * scale), int(W * scale))) mask = transforms.resize(mask, (int(H * scale), int(W * scale)), interpolation=Image.NEAREST) _, H, W = image.shape bbox *= scale # Random crop if self.options['crop']: y_margin = (H - 1) % cell_size + 1 x_margin = (W - 1) % cell_size + 1 y_offset = np.random.randint(y_margin) x_offset = np.random.randint(x_margin) y_size = H - y_margin x_size = W - x_margin y_slice = slice(y_offset, y_offset + y_size) x_slice = slice(x_offset, x_offset + x_size) image = image[:, y_slice, x_slice] bbox = transforms.crop_bbox(bbox, y_slice, x_slice) mask = mask[:, y_slice, x_slice] # Change window width if self.options['window_width'] > 1.0: image = (image - 128) * self.options['window_width'] + 128 # Change contrast if self.options['contrast']: image += np.random.randint(self.options['contrast'] * 2 + 1) - self.options['contrast'] image = np.clip(image, 0, 255) # save_data(image, bbox, mask) return image, label, bbox, mask
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape # random brightness and contrast img = random_distort(img) # rotate image # return a tuple whose elements are rotated image, param. # k (int in param)represents the number of times the image is rotated by 90 degrees. img, params = transforms.random_rotate(img, return_param=True) # restore the new hight and width _, t_H, t_W = img.shape # rotate bbox based on renewed parameters bbox = rotate_bbox(bbox, (H, W), params['k']) # Random expansion:This method randomly place the input image on # a larger canvas. The size of the canvas is (rH,rW), r is a random ratio drawn from [1,max_ratio]. # The canvas is filled by a value fill except for the region where the original image is placed. if np.random.randint(2): fill_value = img.mean(axis=1).mean(axis=1).reshape(-1, 1, 1) img, param = transforms.random_expand(img, max_ratio=2, fill=fill_value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # Random crop # crops the image with bounding box constraints img, param = random_crop_with_bbox_constraints(img, bbox, min_scale=0.5, max_aspect_ratio=1.5, return_param=True) # this translates bounding boxes to fit within the cropped area of an image, bounding boxes whose centers are outside of the cropped area are removed. bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) #assigning new labels to the bounding boxes after cropping label = label[param['index']] # if the bounding boxes are all removed, if bbox.shape[0] == 0: img, bbox, label = in_data # update the height and width of the image _, t_H, t_W = img.shape img = self.faster_rcnn.prepare(img) # prepares the image to match the size of the image to be input into the RCNN _, o_H, o_W = img.shape # resize the bounding box according to the image resize bbox = transforms.resize_bbox(bbox, (t_H, t_W), (o_H, o_W)) # horizontally & vertical flip # simutaneously flip horizontally and vertically of the image img, params = transforms.random_flip(img, x_random=True, y_random=True, return_param=True) # flip the bounding box with respect to the parameter bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip'], y_flip=params['y_flip']) scale = o_H / t_H return img, bbox, label, scale