def __call__(self, in_data): img, bbox, label = in_data # 1. Color augumentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param["y_offset"], x_offset=param["x_offset"]) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param["y_slice"], x_slice=param["x_slice"], allow_outside_center=False, return_param=True) label = label[param["index"]] # 4. Resizing with random interpolation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Transformation for SSD network input img -= self.mean mb_loc, mb_lab = self.coder.encode(bbox, label) return img, mb_loc, mb_lab
def __call__(self, in_data): # There are five data augmentation steps # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 3. Random cropping if self.random_crop and np.random.rand() > 0.5: next_img, param = random_crop_with_bbox_constraints( img, bbox, min_scale=min(self.crop_rate), max_scale=max(self.crop_rate), return_param=True) next_bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) if (len(label[param['index']]) != 0): label = label[param['index']] img, bbox = next_img, next_bbox # 4. Resizing with random interpolatation _, H, W = img.shape img = transforms.resize(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping if self.flip: img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) img -= self.mean img /= self.std _, height, width = img.shape ymin = bbox[:, 0] xmin = bbox[:, 1] ymax = bbox[:, 2] xmax = bbox[:, 3] one_hot_label = np.eye(self.n_class)[label] xs = (xmin + (xmax - xmin) // 2) / width ws = (xmax - xmin) / width ys = (ymin + (ymax - ymin) // 2) / height hs = (ymax - ymin) / height t = [{ 'label': l, 'x': x, 'w': w, 'y': y, 'h': h, 'one_hot_label': hot } for l, x, w, y, h, hot in zip(label, xs, ws, ys, hs, one_hot_label)] return img, t
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping # 6. Random vertical flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) # 6. Random vertical flipping img, params = transforms.random_flip(img, y_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), y_flip=params['y_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): # 5段階のステップでデータの水増しを行う # 1. 色の拡張 # 2. ランダムな拡大 # 3. ランダムなトリミング # 4. ランダムな補完の再補正 # 5. ランダムな水平反転 img, bbox, label = in_data # 1. 色の拡張 # 明るさ,コントラスト,彩度,色相を組み合わせ,データ拡張をする img = random_distort(img) # 2. ランダムな拡大 if np.random.randint(2): # キャンバスの様々な座標に入力画像を置いて,様々な比率の画像を生成し,bounding boxを更新 img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. ランダムなトリミング img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) # トリミングされた画像内にbounding boxが入るように調整 bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. ランダムな補完の再補正 ## 画像とbounding boxのリサイズ _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. ランダムな水平反転 ## 画像とbounding boxをランダムに水平方向に反転 img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) # SSDのネットワークに入力するための準備の処理 img -= self.mean ## SSDに入力するためのloc(デフォルトbounding boxのオフセットとスケール)と ## mb_label(クラスを表す配列)を出力 mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def test_crop_bbox_disallow_outside_center(self): expected = np.array(( (0, 0, 2, 4), (0, 0, 4, 4), (0, 2, 2, 4), ), dtype=np.float32) out, param = crop_bbox( self.bbox, y_slice=self.y_slice, x_slice=self.x_slice, allow_outside_center=False, return_param=True) np.testing.assert_equal(out, expected) np.testing.assert_equal(param['index'], (0, 1, 3))
def test_crop_bbox_disallow_outside_center(self): expected = np.array(( (0, 0, 2, 4), (0, 0, 4, 4), (0, 2, 2, 4), )) out, param = crop_bbox( self.bbox, y_slice=self.y_slice, x_slice=self.x_slice, allow_outside_center=False, return_param=True) np.testing.assert_equal(out, expected) np.testing.assert_equal(param['index'], (0, 1, 3))
def test_crop_bbox(self): expected = np.array(( (0, 0, 2, 4), (0, 0, 4, 4), (0, 2, 2, 4), (2, 3, 3, 4), ), dtype=np.float32) out, param = crop_bbox( self.bbox, y_slice=self.y_slice, x_slice=self.x_slice, return_param=True) np.testing.assert_equal(out, expected) np.testing.assert_equal(param['index'], (0, 1, 3, 4))
def test_crop_bbox(self): expected = np.array(( (0, 0, 2, 4), (0, 0, 4, 4), (0, 2, 2, 4), (2, 3, 3, 4), )) out, param = crop_bbox( self.bbox, y_slice=self.y_slice, x_slice=self.x_slice, return_param=True) np.testing.assert_equal(out, expected) np.testing.assert_equal(param['index'], (0, 1, 3, 4))
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data bbox = np.array(bbox).astype(np.float32) if len(bbox) == 0: warnings.warn("No bounding box detected", RuntimeWarning) img = resize_with_random_interpolation(img, (self.size, self.size)) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): img, bbox, label = in_data img = random_distort(img) if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): """in_data includes three datas. Args: img(array): Shape is (3, H, W). range is [0, 255]. bbox(array): Shape is (N, 4). (y_min, x_min, y_max, x_max). range is [0, max size of boxes]. label(array): Classes of bounding boxes. Returns: img(array): Shape is (3, out_H, out_W). range is [0, 1]. interpolation value equals to self.value. """ # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping if self.count % 10 == 0 and self.count % self.batchsize == 0 and self.count != 0: self.i += 1 i = self.i % len(self.dim) self.output_shape = (self.dim[i], self.dim[i]) self.count += 1 img, bbox, label = in_data # 1. Color augmentation img = random_distort(img, brightness_delta=32, contrast_low=0.5, contrast_high=1.5, saturation_low=0.5, saturation_high=1.5, hue_delta=25) # Normalize. range is [0, 1] img /= 255.0 _, H, W = img.shape scale = np.random.uniform(0.25, 2) random_expand = np.random.uniform(0.8, 1.2, 2) net_h, net_w = self.output_shape out_h = net_h * scale # random_expand[0] out_w = net_w * scale # random_expand[1] if H > W: out_w = out_h * (float(W) / H) * np.random.uniform(0.8, 1.2) elif H < W: out_h = out_w * (float(H) / W) * np.random.uniform(0.8, 1.2) out_h = int(out_h) out_w = int(out_w) img = resize_with_random_interpolation(img, (out_h, out_w)) bbox = transforms.resize_bbox(bbox, (H, W), (out_h, out_w)) if out_h < net_h and out_w < net_w: img, param = expand(img, out_h=net_h, out_w=net_w, fill=self.value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) else: out_h = net_h if net_h > out_h else int(out_h * 1.05) out_w = net_w if net_w > out_w else int(out_w * 1.05) img, param = expand(img, out_h=out_h, out_w=out_w, fill=self.value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) img, param = crop_with_bbox_constraints(img, bbox, return_param=True, crop_height=net_h, crop_width=net_w) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 5. Random horizontal flipping # OK img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, self.output_shape, x_flip=params['x_flip']) # Preparation for Yolov2 network. scale=[0, 1] bbox[:, ::2] /= self.output_shape[0] # y bbox[:, 1::2] /= self.output_shape[1] # x num_bbox = len(bbox) len_max = max(num_bbox, self.max_target) out_bbox = np.zeros((len_max, 4), dtype='f') out_bbox[:num_bbox] = bbox[:num_bbox] out_label = np.zeros((len_max), dtype='i') out_label[:num_bbox] = label out_bbox = out_bbox[:self.max_target] out_label = out_label[:self.max_target] num_array = min(num_bbox, self.max_target) gmap = create_map_anchor_gt(bbox, self.anchors, self.output_shape, self.downscale, self.n_boxes, len_max) gmap = gmap[:self.max_target] img = np.clip(img, 0, 1) return img, out_bbox, out_label, gmap, np.array([num_array], dtype='i')
def __call__(self, in_data): img, bbox, label = in_data _, H, W = img.shape # random brightness and contrast img = random_distort(img) # rotate image # return a tuple whose elements are rotated image, param. # k (int in param)represents the number of times the image is rotated by 90 degrees. img, params = transforms.random_rotate(img, return_param=True) # restore the new hight and width _, t_H, t_W = img.shape # rotate bbox based on renewed parameters bbox = rotate_bbox(bbox, (H, W), params['k']) # Random expansion:This method randomly place the input image on # a larger canvas. The size of the canvas is (rH,rW), r is a random ratio drawn from [1,max_ratio]. # The canvas is filled by a value fill except for the region where the original image is placed. if np.random.randint(2): fill_value = img.mean(axis=1).mean(axis=1).reshape(-1, 1, 1) img, param = transforms.random_expand(img, max_ratio=2, fill=fill_value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # Random crop # crops the image with bounding box constraints img, param = random_crop_with_bbox_constraints(img, bbox, min_scale=0.5, max_aspect_ratio=1.5, return_param=True) # this translates bounding boxes to fit within the cropped area of an image, bounding boxes whose centers are outside of the cropped area are removed. bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) #assigning new labels to the bounding boxes after cropping label = label[param['index']] # if the bounding boxes are all removed, if bbox.shape[0] == 0: img, bbox, label = in_data # update the height and width of the image _, t_H, t_W = img.shape img = self.faster_rcnn.prepare(img) # prepares the image to match the size of the image to be input into the RCNN _, o_H, o_W = img.shape # resize the bounding box according to the image resize bbox = transforms.resize_bbox(bbox, (t_H, t_W), (o_H, o_W)) # horizontally & vertical flip # simutaneously flip horizontally and vertically of the image img, params = transforms.random_flip(img, x_random=True, y_random=True, return_param=True) # flip the bounding box with respect to the parameter bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip'], y_flip=params['y_flip']) scale = o_H / t_H return img, bbox, label, scale
def __call__(self, in_data): image, label, bbox, mask = in_data _, H, W = image.shape cell_size = 32 * self.downscale # Horizontal flip if self.options['x_flip']: if np.random.randint(2) == 0: image = transforms.flip(image, x_flip=True) bbox = transforms.flip_bbox(bbox, (H, W), x_flip=True) mask = transforms.flip(mask, x_flip=True) # Random rotation (90 or 270 degrees) if self.options['rotate90']: assert H == W, 'Height and width must match when `rotate90` is set.' if np.random.randint(2) == 0: # Rotate? if np.random.randint(2) == 0: # Counter-clockwise? bbox = rotate_bbox(bbox, 1, (H, W)) image = np.rot90(image, 1, axes=(1, 2)) mask = np.rot90(mask, 1, axes=(1, 2)) else: bbox = rotate_bbox(bbox, 3, (H, W)) image = np.rot90(image, 3, axes=(1, 2)) mask = np.rot90(mask, 3, axes=(1, 2)) _, H, W = image.shape # Zoom in / zoom out if self.options['zoom'] > 1: assert self.options[ 'scale'] <= 1.0, "`scale` shouldn't be set if `zoom` is set." max_log_zoom = np.log(self.options['zoom']) log_zoom = np.random.random() * 2 * max_log_zoom - max_log_zoom zoom = np.exp(log_zoom) if zoom > 1: # Zoom in y_size, x_size = int(H / zoom), int(W / zoom) y_offset = np.random.randint(H - y_size + 1) x_offset = np.random.randint(W - x_size + 1) y_slice = slice(y_offset, y_offset + y_size) x_slice = slice(x_offset, x_offset + x_size) bbox = transforms.crop_bbox(bbox, y_slice, x_slice) bbox *= zoom image = transforms.resize(image[:, y_slice, x_slice], (H, W)) mask = transforms.resize(mask[:, y_slice, x_slice], (H, W), interpolation=Image.NEAREST) elif zoom < 1: # Zoom out y_size, x_size = int(H / zoom), int(W / zoom) y_offset = np.random.randint(y_size - H + 1) x_offset = np.random.randint(x_size - W + 1) bbox = transforms.translate_bbox(bbox, y_offset, x_offset) new_image = np.zeros((1, y_size, x_size), dtype=np.float32) new_image[:, y_offset:y_offset + H, x_offset:x_offset + W] = image new_mask = np.zeros((1, y_size, x_size), dtype=np.float32) new_mask[:, y_offset:y_offset + H, x_offset:x_offset + W] = mask bbox *= zoom image = transforms.resize(new_image, (H, W)) mask = transforms.resize(new_mask, (H, W), interpolation=Image.NEAREST) # Random scale if self.options['scale'] > 1.0: assert self.options[ 'crop'], '`crop` must be set if `scale` is set.' max_log_scale = np.log(self.options['scale']) log_scale = np.random.random() * 2 * max_log_scale - max_log_scale scale = np.exp(log_scale) image = transforms.resize(image, (int(H * scale), int(W * scale))) mask = transforms.resize(mask, (int(H * scale), int(W * scale)), interpolation=Image.NEAREST) _, H, W = image.shape bbox *= scale # Random crop if self.options['crop']: y_margin = (H - 1) % cell_size + 1 x_margin = (W - 1) % cell_size + 1 y_offset = np.random.randint(y_margin) x_offset = np.random.randint(x_margin) y_size = H - y_margin x_size = W - x_margin y_slice = slice(y_offset, y_offset + y_size) x_slice = slice(x_offset, x_offset + x_size) image = image[:, y_slice, x_slice] bbox = transforms.crop_bbox(bbox, y_slice, x_slice) mask = mask[:, y_slice, x_slice] # Change window width if self.options['window_width'] > 1.0: image = (image - 128) * self.options['window_width'] + 128 # Change contrast if self.options['contrast']: image += np.random.randint(self.options['contrast'] * 2 + 1) - self.options['contrast'] image = np.clip(image, 0, 255) # save_data(image, bbox, mask) return image, label, bbox, mask
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping # mask = None img, bbox, label, mask = in_data # TODO: show information # self._show_img(img) # self._show_mask(mask) # 1. Color augmentation img = random_distort(img) # self._show_img(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) if mask is not None: _, new_height, new_width = img.shape param['new_height'] = new_height param['new_width'] = new_width mask = self._random_expand_mask(mask, param) # self._show_img(img) # self._show_mask(mask) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) # self._show_img(img) mask = self._fixed_crop_mask(mask, param['y_slice'], param['x_slice']) # self._show_mask(mask) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) # self._show_img(img) if mask is not None: if mask.size == 0: raise RuntimeError mask = self._resize_with_nearest(mask, (self.size, self.size)) # self._show_mask(mask) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) if mask is not None: mask = self._random_flip_mask(mask, x_flip=params['x_flip'], y_flip=params['y_flip']) # self._show_img(img) # self._show_mask(mask) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) if mask is None: mask = np.ones([self.size, self.size], dtype=np.int32) * -1 # print("Dtype is :"+str(mask.dtype)) data_type = str(mask.dtype) target_type = 'int32' if data_type != target_type: mask = mask.astype(np.int32) if img is None: raise RuntimeError return img, mb_loc, mb_label, mask