def rand_crop(crop_img, crop_seg, mode=ModelPhase.TRAIN): """ 随机裁剪图片和标签图, 若crop尺寸大于原始尺寸,分别使用均值和ignore值填充再进行crop, crop尺寸与原始尺寸一致,返回原图,crop尺寸小于原始尺寸直接crop Args: crop_img(numpy.ndarray): 输入图像 crop_seg(numpy.ndarray): 标签图 mode(string): 模式, 默认训练模式,验证或预测、可视化模式时crop尺寸需大于原始图片尺寸 Returns: 裁剪后的图片和标签图 """ img_height = crop_img.shape[0] img_width = crop_img.shape[1] if ModelPhase.is_train(mode): crop_width = cfg.TRAIN_CROP_SIZE[0] crop_height = cfg.TRAIN_CROP_SIZE[1] else: crop_width = cfg.EVAL_CROP_SIZE[0] crop_height = cfg.EVAL_CROP_SIZE[1] if not ModelPhase.is_train(mode): if (crop_height < img_height or crop_width < img_width): raise Exception( "Crop size({},{}) must large than img size({},{}) when in EvalPhase." .format(crop_width, crop_height, img_width, img_height)) if img_height == crop_height and img_width == crop_width: return crop_img, crop_seg else: pad_height = max(crop_height - img_height, 0) pad_width = max(crop_width - img_width, 0) if (pad_height > 0 or pad_width > 0): crop_img = cv2.copyMakeBorder(crop_img, 0, pad_height, 0, pad_width, cv2.BORDER_CONSTANT, value=cfg.DATASET.PADDING_VALUE) if crop_seg is not None: crop_seg = cv2.copyMakeBorder(crop_seg, 0, pad_height, 0, pad_width, cv2.BORDER_CONSTANT, value=cfg.DATASET.IGNORE_INDEX) img_height = crop_img.shape[0] img_width = crop_img.shape[1] if crop_height > 0 and crop_width > 0: h_off = np.random.randint(img_height - crop_height + 1) w_off = np.random.randint(img_width - crop_width + 1) crop_img = crop_img[h_off:(crop_height + h_off), w_off:(w_off + crop_width), :] if crop_seg is not None: crop_seg = crop_seg[h_off:(crop_height + h_off), w_off:(w_off + crop_width)] return crop_img, crop_seg
def process_image(self, line, data_dir, mode): """ process_image """ img, grt, img_name, grt_name = self.load_image( line, data_dir, mode=mode) if mode == ModelPhase.TRAIN: img, grt = aug.resize(img, grt, mode) if cfg.AUG.RICH_CROP.ENABLE: if cfg.AUG.RICH_CROP.BLUR: if cfg.AUG.RICH_CROP.BLUR_RATIO <= 0: n = 0 elif cfg.AUG.RICH_CROP.BLUR_RATIO >= 1: n = 1 else: n = int(1.0 / cfg.AUG.RICH_CROP.BLUR_RATIO) if n > 0: if np.random.randint(0, n) == 0: radius = np.random.randint(3, 10) if radius % 2 != 1: radius = radius + 1 if radius > 9: radius = 9 img = cv2.GaussianBlur(img, (radius, radius), 0, 0) img, grt = aug.random_rotation( img, grt, rich_crop_max_rotation=cfg.AUG.RICH_CROP.MAX_ROTATION, mean_value=cfg.DATASET.PADDING_VALUE) img, grt = aug.rand_scale_aspect( img, grt, rich_crop_min_scale=cfg.AUG.RICH_CROP.MIN_AREA_RATIO, rich_crop_aspect_ratio=cfg.AUG.RICH_CROP.ASPECT_RATIO) img = aug.hsv_color_jitter( img, brightness_jitter_ratio=cfg.AUG.RICH_CROP. BRIGHTNESS_JITTER_RATIO, saturation_jitter_ratio=cfg.AUG.RICH_CROP. SATURATION_JITTER_RATIO, contrast_jitter_ratio=cfg.AUG.RICH_CROP. CONTRAST_JITTER_RATIO) if cfg.AUG.FLIP: if cfg.AUG.FLIP_RATIO <= 0: n = 0 elif cfg.AUG.FLIP_RATIO >= 1: n = 1 else: n = int(1.0 / cfg.AUG.FLIP_RATIO) if n > 0: if np.random.randint(0, n) == 0: img = img[::-1, :, :] grt = grt[::-1, :] if cfg.AUG.MIRROR: if np.random.randint(0, 2) == 1: img = img[:, ::-1, :] grt = grt[:, ::-1] img, grt = aug.rand_crop(img, grt, mode=mode) elif ModelPhase.is_eval(mode): img, grt = aug.resize(img, grt, mode=mode) img, grt = aug.rand_crop(img, grt, mode=mode) elif ModelPhase.is_visual(mode): org_shape = [img.shape[0], img.shape[1]] img, grt = aug.resize(img, grt, mode=mode) valid_shape = [img.shape[0], img.shape[1]] img, grt = aug.rand_crop(img, grt, mode=mode) else: raise ValueError("Dataset mode={} Error!".format(mode)) # Normalize image img = self.normalize_image(img) if ModelPhase.is_train(mode) or ModelPhase.is_eval(mode): grt = np.expand_dims(np.array(grt).astype('int32'), axis=0) ignore = (grt != cfg.DATASET.IGNORE_INDEX).astype('int32') if ModelPhase.is_train(mode): return (img, grt, ignore) elif ModelPhase.is_eval(mode): return (img, grt, ignore) elif ModelPhase.is_visual(mode): return (img, grt, img_name, valid_shape, org_shape)
def process_image(self, line, data_dir, mode): """ process_image """ img1, img2, grt1, grt2, img1_name, img2_name, grt1_name, grt2_name = self.load_image( line, data_dir, mode=mode) grt1 = grt1 + 1 if grt1 is not None else None if mode == ModelPhase.TRAIN: img1, img2, grt1, grt2 = aug.resize(img1, img2, grt1, grt2, mode) img1, img2, grt1, grt2 = aug.rand_crop( img1, img2, grt1, grt2, mode=mode) if cfg.AUG.RICH_CROP.ENABLE: if cfg.AUG.RICH_CROP.BLUR: if cfg.AUG.RICH_CROP.BLUR_RATIO <= 0: n = 0 elif cfg.AUG.RICH_CROP.BLUR_RATIO >= 1: n = 1 else: n = int(1.0 / cfg.AUG.RICH_CROP.BLUR_RATIO) if n > 0: if np.random.randint(0, n) == 0: radius = np.random.randint(3, 10) if radius % 2 != 1: radius = radius + 1 if radius > 9: radius = 9 img1 = cv2.GaussianBlur(img1, (radius, radius), 0, 0) if img2 is not None: img2 = cv2.GaussianBlur(img2, (radius, radius), 0, 0) img1, img2, grt1, grt2 = aug.random_rotation( img1, img2, grt1, grt2, rich_crop_max_rotation=cfg.AUG.RICH_CROP.MAX_ROTATION, mean_value=cfg.DATASET.PADDING_VALUE) img1, img2, grt1, grt2 = aug.rand_scale_aspect( img1, img2, grt1, grt2, rich_crop_min_scale=cfg.AUG.RICH_CROP.MIN_AREA_RATIO, rich_crop_aspect_ratio=cfg.AUG.RICH_CROP.ASPECT_RATIO) img1, img2 = aug.hsv_color_jitter( img1, img2, brightness_jitter_ratio=cfg.AUG.RICH_CROP. BRIGHTNESS_JITTER_RATIO, saturation_jitter_ratio=cfg.AUG.RICH_CROP. SATURATION_JITTER_RATIO, contrast_jitter_ratio=cfg.AUG.RICH_CROP. CONTRAST_JITTER_RATIO) if cfg.AUG.RANDOM_ROTATION90: rot_k = np.random.randint(0, 4) img1 = np.rot90(img1, k=rot_k) img2 = np.rot90(img2, k=rot_k) if img2 is not None else None grt1 = np.rot90(grt1, k=rot_k) grt2 = np.rot90(grt2, k=rot_k) if grt2 is not None else None if cfg.AUG.FLIP: if cfg.AUG.FLIP_RATIO <= 0: n = 0 elif cfg.AUG.FLIP_RATIO >= 1: n = 1 else: n = int(1.0 / cfg.AUG.FLIP_RATIO) if n > 0: if np.random.randint(0, n) == 0: img1 = img1[::-1, :, :] img2 = img2[::-1, :, :] if img2 is not None else None grt1 = grt1[::-1, :] grt2 = grt2[::-1, :] if grt2 is not None else None if cfg.AUG.MIRROR: if np.random.randint(0, 2) == 1: img1 = img1[:, ::-1, :] img2 = img2[:, ::-1, :] if img2 is not None else None grt1 = grt1[:, ::-1] grt2 = grt2[:, ::-1] if grt2 is not None else None elif ModelPhase.is_eval(mode): img1, img2, grt1, grt2 = aug.resize( img1, img2, grt1, grt2, mode=mode) img1, img2, grt1, grt2 = aug.rand_crop( img1, img2, grt1, grt2, mode=mode) if cfg.TEST.TEST_AUG: img1 = self.test_aug(img1) img2 = self.test_aug(img2) if img2 is not None else None elif ModelPhase.is_visual(mode): org_shape = [img1.shape[0], img1.shape[1]] img1, img2, grt1, grt2 = aug.resize( img1, img2, grt1, grt2, mode=mode) valid_shape = [img1.shape[0], img1.shape[1]] img1, img2, grt1, grt2 = aug.rand_crop( img1, img2, grt1, grt2, mode=mode) else: raise ValueError("Dataset mode={} Error!".format(mode)) # Normalize image img1 = self.normalize_image(img1) img2 = self.normalize_image(img2) if img2 is not None else None if grt2 is not None: grt = grt1 * cfg.DATASET.NUM_CLASSES + grt2 unchange_idx = np.where((grt1 - grt2) == 0) grt[unchange_idx] = 0 if cfg.DATASET.NUM_CLASSES == 2: grt[np.where(grt != 0)] = 1 ignore_idx = np.where((grt1 == cfg.DATASET.IGNORE_INDEX) | (grt2 == cfg.DATASET.IGNORE_INDEX)) grt[ignore_idx] = cfg.DATASET.IGNORE_INDEX else: grt = grt1 if ModelPhase.is_train(mode) or ModelPhase.is_eval(mode): grt = np.expand_dims(np.array(grt).astype('int32'), axis=0) ignore = (grt != cfg.DATASET.IGNORE_INDEX).astype('int32') if cfg.DATASET.INPUT_IMAGE_NUM == 1: if ModelPhase.is_train(mode): return (img1, grt, ignore) elif ModelPhase.is_eval(mode): return (img1, grt, ignore) elif ModelPhase.is_visual(mode): return (img1, grt, img1_name, valid_shape, org_shape) else: if ModelPhase.is_train(mode): return (img1, img2, grt, ignore) elif ModelPhase.is_eval(mode): return (img1, img2, grt, ignore) elif ModelPhase.is_visual(mode): return (img1, img2, grt, img1_name, img2_name, valid_shape, org_shape)
def load_image(self, line, src_dir, mode=ModelPhase.TRAIN): # original image cv2.imread flag setting cv2_imread_flag = cv2.IMREAD_COLOR if cfg.DATASET.IMAGE_TYPE == "rgba": # If use RBGA 4 channel ImageType, use IMREAD_UNCHANGED flags to # reserver alpha channel cv2_imread_flag = cv2.IMREAD_UNCHANGED parts = line.strip().split(cfg.DATASET.SEPARATOR) if len(parts) != 2: if mode == ModelPhase.TRAIN or mode == ModelPhase.EVAL: raise Exception("File list format incorrect! It should be" " image_name{}label_name\\n".format( cfg.DATASET.SEPARATOR)) img_name, grt_name = parts[0], None else: img_name, grt_name = parts[0], parts[1] img_path = os.path.join(src_dir, img_name) img = cv2_imread(img_path, cv2_imread_flag) if grt_name is not None: grt_path = os.path.join(src_dir, grt_name) grt = pil_imread(grt_path) else: grt = None if img is None: raise Exception( "Empty image, source image path: {}".format(img_path)) img_height = img.shape[0] img_width = img.shape[1] if grt is not None: grt_height = grt.shape[0] grt_width = grt.shape[1] if img_height != grt_height or img_width != grt_width: if ModelPhase.is_visual(mode): pass else: raise Exception( "Source img and label img must has the same size.") else: if mode == ModelPhase.TRAIN or mode == ModelPhase.EVAL: raise Exception( "No laber image path for image '{}' when training or evaluating. " .format(img_path)) if len(img.shape) < 3: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) img_channels = img.shape[2] if img_channels < 3: raise Exception("PaddleSeg only supports gray, rgb or rgba image") if img_channels != cfg.DATASET.DATA_DIM: raise Exception( "Input image channel({}) is not match cfg.DATASET.DATA_DIM({}), img_name={}" .format(img_channels, cfg.DATASET.DATADIM, img_name)) if img_channels != len(cfg.MEAN): raise Exception( "Image name {}, image channels {} do not equal the length of cfg.MEAN {}." .format(img_name, img_channels, len(cfg.MEAN))) if img_channels != len(cfg.STD): raise Exception( "Image name {}, image channels {} do not equal the length of cfg.STD {}." .format(img_name, img_channels, len(cfg.STD))) return img, grt, img_name, grt_name