def preprocess(image, joint, camera, param): logger.debug("rotate") # random rotate image, joint = rotate_hand(image, joint, camera, angle=param["angle"]) if param["add_noise"]: noise = np.random.normal(0, 1, joint.shape) joint = joint + noise # crop logger.debug("crop") image, camera, domain = crop_handwith2d( image, joint, camera, param["oscillation"], ) logger.debug("scale") image, camera = scale(image, camera, param["crop2d"]) logger.debug("flip") # random flip image, joint = flip_hand( image, joint, camera, y_flip=param["y_flip"], x_flip=param["x_flip"], ) logger.debug("flip") # scale image so that target image contains a specific domain image, camera = resize_contain(image, camera, size=param["crop2d"], fill=0) if param["do_color_augmentation"]: logger.debug("distort image") image = random_distort(image) return image, joint, camera, domain
def __getitem__(self, _idx): _camera, _img, _skel_2d, _skel3d, _side = super(DataPose, self).__getitem__(_idx) _bbox = self._getbbox(_skel_2d) _target = self._gettarget() _trans, _crop = self._gen_trans(_bbox, _target, _img) if random.random() <= 0.5: # flip _skel_2d[:, 0] = _img.shape[1] - _skel_2d[:, 0] _skel3d[:, 0] = -_skel3d[:, 0] _side = 1 - _side _bbox = self._getbbox(_skel_2d) _trans, _ = self._gen_trans(_bbox, _target, None) _crop = _crop[:, ::-1, :] _uv2d = self._get_point2d(_skel_2d, _trans) _crop_img = _crop _crop_img = _crop_img.transpose((2, 0, 1)) from chainercv.links.model.ssd import random_distort _crop_img = random_distort(_crop_img) if False: _crop_img_cop = _crop_img.transpose((1, 2, 0)) self._vis(_crop_img_cop, _uv2d, _skel3d) return np.array(_crop_img, dtype=np.float32), \ np.array(_uv2d / self.config.input_size, dtype=np.float32), \ np.uint8(_side)
def __call__(self, in_data): img, bbox, label = in_data # 1. Color augumentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param["y_offset"], x_offset=param["x_offset"]) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param["y_slice"], x_slice=param["x_slice"], allow_outside_center=False, return_param=True) label = label[param["index"]] # 4. Resizing with random interpolation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Transformation for SSD network input img -= self.mean mb_loc, mb_lab = self.coder.encode(bbox, label) return img, mb_loc, mb_lab
def test_random_distort(self): if not optional_modules: return img = np.random.randint(0, 256, size=(3, 48, 32)).astype(np.float32) out = random_distort(img) self.assertEqual(out.shape, img.shape) self.assertEqual(out.dtype, img.dtype)
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping # 6. Random vertical flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) # 6. Random vertical flipping img, params = transforms.random_flip(img, y_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), y_flip=params['y_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): # 5段階のステップでデータの水増しを行う # 1. 色の拡張 # 2. ランダムな拡大 # 3. ランダムなトリミング # 4. ランダムな補完の再補正 # 5. ランダムな水平反転 img, bbox, label = in_data # 1. 色の拡張 # 明るさ,コントラスト,彩度,色相を組み合わせ,データ拡張をする img = random_distort(img) # 2. ランダムな拡大 if np.random.randint(2): # キャンバスの様々な座標に入力画像を置いて,様々な比率の画像を生成し,bounding boxを更新 img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. ランダムなトリミング img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) # トリミングされた画像内にbounding boxが入るように調整 bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. ランダムな補完の再補正 ## 画像とbounding boxのリサイズ _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. ランダムな水平反転 ## 画像とbounding boxをランダムに水平方向に反転 img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) # SSDのネットワークに入力するための準備の処理 img -= self.mean ## SSDに入力するためのloc(デフォルトbounding boxのオフセットとスケール)と ## mb_label(クラスを表す配列)を出力 mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data bbox = np.array(bbox).astype(np.float32) if len(bbox) == 0: warnings.warn("No bounding box detected", RuntimeWarning) img = resize_with_random_interpolation(img, (self.size, self.size)) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping img, bbox, label = in_data # 1. Color augmentation img = random_distort(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): img, bbox, label = in_data img = random_distort(img) if np.random.randint(2): img, param = transforms.random_expand(img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) img, param = random_crop_with_bbox_constraints(img, bbox, return_param=True) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (self.size, self.size), x_flip=params['x_flip']) img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) return img, mb_loc, mb_label
def __call__(self, in_data): """in_data includes three datas. Args: img(array): Shape is (3, H, W). range is [0, 255]. bbox(array): Shape is (N, 4). (y_min, x_min, y_max, x_max). range is [0, max size of boxes]. label(array): Classes of bounding boxes. Returns: img(array): Shape is (3, out_H, out_W). range is [0, 1]. interpolation value equals to self.value. """ # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping if self.count % 10 == 0 and self.count % self.batchsize == 0 and self.count != 0: self.i += 1 i = self.i % len(self.dim) self.output_shape = (self.dim[i], self.dim[i]) self.count += 1 img, bbox, label = in_data # 1. Color augmentation img = random_distort(img, brightness_delta=32, contrast_low=0.5, contrast_high=1.5, saturation_low=0.5, saturation_high=1.5, hue_delta=25) # Normalize. range is [0, 1] img /= 255.0 _, H, W = img.shape scale = np.random.uniform(0.25, 2) random_expand = np.random.uniform(0.8, 1.2, 2) net_h, net_w = self.output_shape out_h = net_h * scale # random_expand[0] out_w = net_w * scale # random_expand[1] if H > W: out_w = out_h * (float(W) / H) * np.random.uniform(0.8, 1.2) elif H < W: out_h = out_w * (float(H) / W) * np.random.uniform(0.8, 1.2) out_h = int(out_h) out_w = int(out_w) img = resize_with_random_interpolation(img, (out_h, out_w)) bbox = transforms.resize_bbox(bbox, (H, W), (out_h, out_w)) if out_h < net_h and out_w < net_w: img, param = expand(img, out_h=net_h, out_w=net_w, fill=self.value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) else: out_h = net_h if net_h > out_h else int(out_h * 1.05) out_w = net_w if net_w > out_w else int(out_w * 1.05) img, param = expand(img, out_h=out_h, out_w=out_w, fill=self.value, return_param=True) bbox = transforms.translate_bbox(bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) img, param = crop_with_bbox_constraints(img, bbox, return_param=True, crop_height=net_h, crop_width=net_w) bbox, param = transforms.crop_bbox(bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 5. Random horizontal flipping # OK img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, self.output_shape, x_flip=params['x_flip']) # Preparation for Yolov2 network. scale=[0, 1] bbox[:, ::2] /= self.output_shape[0] # y bbox[:, 1::2] /= self.output_shape[1] # x num_bbox = len(bbox) len_max = max(num_bbox, self.max_target) out_bbox = np.zeros((len_max, 4), dtype='f') out_bbox[:num_bbox] = bbox[:num_bbox] out_label = np.zeros((len_max), dtype='i') out_label[:num_bbox] = label out_bbox = out_bbox[:self.max_target] out_label = out_label[:self.max_target] num_array = min(num_bbox, self.max_target) gmap = create_map_anchor_gt(bbox, self.anchors, self.output_shape, self.downscale, self.n_boxes, len_max) gmap = gmap[:self.max_target] img = np.clip(img, 0, 1) return img, out_bbox, out_label, gmap, np.array([num_array], dtype='i')
def _data_augumentation(self, image): image = random_distort(image) image = random_flip(image, x_random=True) return image
def __getitem__(self, index): img_path = self.img_files[index % len(self.img_files)].rstrip() img = np.array(Image.open(img_path)) h, w, _ = img.shape label_path = self.label_files[index % len(self.img_files)].rstrip() if not os.path.exists(label_path): raise Exception( "the label file(.txt) is not found corresponding + " + img_path) labels = np.loadtxt(label_path).reshape(-1, 5) #-------------------- # data augmentation #-------------------- lx = w * (labels[:, 1] - labels[:, 3] / 2) ly = h * (labels[:, 2] - labels[:, 4] / 2) bx = w * (labels[:, 1] + labels[:, 3] / 2) by = h * (labels[:, 2] + labels[:, 4] / 2) # convert to chainercv format: (ly, lx, by, bx) cv_bbox = np.stack([ly, lx, by, bx], axis=1) cv_labels = labels[:, 0].reshape(-1).astype(np.int) cv_img = img.transpose(2, 0, 1) # 1. Random distort cv_img = random_distort(cv_img) # 2. Random cropping cv_img, param = random_crop_with_bbox_constraints(cv_img, cv_bbox, min_scale=0.3, return_param=True) cv_bbox, param = transforms.crop_bbox(cv_bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) cv_labels = cv_labels[param['index']] # 3. Random horizontal flipping _, _h, _w = cv_img.shape cv_img, params = transforms.random_flip(cv_img, x_random=True, return_param=True) cv_bbox = transforms.flip_bbox(cv_bbox, (_h, _w), x_flip=params['x_flip']) # update params img = cv_img.transpose(1, 2, 0) h, w, _ = img.shape # convert to default format: (cx, cy, w, h) labels = np.zeros((cv_labels.size, 5)) labels[:, 0] = cv_labels labels[:, 1] = (cv_bbox[:, 3] + cv_bbox[:, 1]) / 2.0 / w # cx labels[:, 2] = (cv_bbox[:, 2] + cv_bbox[:, 0]) / 2.0 / h # cy labels[:, 3] = (cv_bbox[:, 3] - cv_bbox[:, 1]) / w # w labels[:, 4] = (cv_bbox[:, 2] - cv_bbox[:, 0]) / h # x #--------- # image #--------- dim_diff = np.abs(h - w) # Upper (left) and lower (right) padding pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2 # Determine padding pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0)) # Add padding input_img = np.pad(img, pad, 'constant', constant_values=128) / 255. padded_h, padded_w, _ = input_img.shape # Resize and normalize input_img = resize(input_img, (*self.img_shape, 3), mode='reflect') # Channels-first input_img = np.transpose(input_img, (2, 0, 1)) # As pytorch tensor input_img = torch.from_numpy(input_img).float() #--------- # Label #--------- # Extract coordinates for unpadded + unscaled image x1 = w * (labels[:, 1] - labels[:, 3] / 2) y1 = h * (labels[:, 2] - labels[:, 4] / 2) x2 = w * (labels[:, 1] + labels[:, 3] / 2) y2 = h * (labels[:, 2] + labels[:, 4] / 2) # Adjust for added padding x1 += pad[1][0] y1 += pad[0][0] x2 += pad[1][0] y2 += pad[0][0] # Calculate ratios from coordinates labels[:, 1] = ((x1 + x2) / 2) / padded_w labels[:, 2] = ((y1 + y2) / 2) / padded_h labels[:, 3] *= w / padded_w labels[:, 4] *= h / padded_h # Fill matrix filled_labels = np.zeros((self.max_objects, 5)) if labels is not None: filled_labels[range( len(labels))[:self.max_objects]] = labels[:self.max_objects] filled_labels = torch.from_numpy(filled_labels) return img_path, input_img, filled_labels
def __call__(self, in_data): # There are five data augmentation steps # 1. Color augmentation # 2. Random expansion # 3. Random cropping # 4. Resizing with random interpolation # 5. Random horizontal flipping # mask = None img, bbox, label, mask = in_data # TODO: show information # self._show_img(img) # self._show_mask(mask) # 1. Color augmentation img = random_distort(img) # self._show_img(img) # 2. Random expansion if np.random.randint(2): img, param = transforms.random_expand( img, fill=self.mean, return_param=True) bbox = transforms.translate_bbox( bbox, y_offset=param['y_offset'], x_offset=param['x_offset']) if mask is not None: _, new_height, new_width = img.shape param['new_height'] = new_height param['new_width'] = new_width mask = self._random_expand_mask(mask, param) # self._show_img(img) # self._show_mask(mask) # 3. Random cropping img, param = random_crop_with_bbox_constraints( img, bbox, return_param=True) # self._show_img(img) mask = self._fixed_crop_mask(mask, param['y_slice'], param['x_slice']) # self._show_mask(mask) bbox, param = transforms.crop_bbox( bbox, y_slice=param['y_slice'], x_slice=param['x_slice'], allow_outside_center=False, return_param=True) label = label[param['index']] # 4. Resizing with random interpolatation _, H, W = img.shape img = resize_with_random_interpolation(img, (self.size, self.size)) # self._show_img(img) if mask is not None: if mask.size == 0: raise RuntimeError mask = self._resize_with_nearest(mask, (self.size, self.size)) # self._show_mask(mask) bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size)) # 5. Random horizontal flipping img, params = transforms.random_flip( img, x_random=True, return_param=True) bbox = transforms.flip_bbox( bbox, (self.size, self.size), x_flip=params['x_flip']) if mask is not None: mask = self._random_flip_mask(mask, x_flip=params['x_flip'], y_flip=params['y_flip']) # self._show_img(img) # self._show_mask(mask) # Preparation for SSD network img -= self.mean mb_loc, mb_label = self.coder.encode(bbox, label) if mask is None: mask = np.ones([self.size, self.size], dtype=np.int32) * -1 # print("Dtype is :"+str(mask.dtype)) data_type = str(mask.dtype) target_type = 'int32' if data_type != target_type: mask = mask.astype(np.int32) if img is None: raise RuntimeError return img, mb_loc, mb_label, mask