def __call__(self, image): image = [self.preprocess(c) for c in image] i, j, h, w = self.get_params(image[0], self.size) return [F.crop(c, i, j, h, w) for c in image]
def __call__(self, image): image = F.crop(image, self.top, self.left, self.height, self.width) return image
def fun(o): return F.crop(o, i, j, h, w)
def transform(image, label, logits=None, crop_size=(512, 512), scale_size=(0.8, 1.0), augmentation=True): # Random rescale image raw_w, raw_h = image.size scale_ratio = random.uniform(scale_size[0], scale_size[1]) resized_size = (int(raw_h * scale_ratio), int(raw_w * scale_ratio)) image = transforms_f.resize(image, resized_size, Image.BILINEAR) label = transforms_f.resize(label, resized_size, Image.NEAREST) if logits is not None: logits = transforms_f.resize(logits, resized_size, Image.NEAREST) # Add padding if rescaled image size is less than crop size if crop_size == -1: # use original im size without crop or padding crop_size = (raw_h, raw_w) if crop_size[0] > resized_size[0] or crop_size[1] > resized_size[1]: right_pad, bottom_pad = max(crop_size[1] - resized_size[1], 0), max(crop_size[0] - resized_size[0], 0) image = transforms_f.pad(image, padding=(0, 0, right_pad, bottom_pad), padding_mode='reflect') label = transforms_f.pad(label, padding=(0, 0, right_pad, bottom_pad), fill=255, padding_mode='constant') if logits is not None: logits = transforms_f.pad(logits, padding=(0, 0, right_pad, bottom_pad), fill=0, padding_mode='constant') # Random Cropping i, j, h, w = transforms.RandomCrop.get_params(image, output_size=crop_size) image = transforms_f.crop(image, i, j, h, w) label = transforms_f.crop(label, i, j, h, w) if logits is not None: logits = transforms_f.crop(logits, i, j, h, w) if augmentation: # Random color jitter if torch.rand(1) > 0.2: color_transform = transforms.ColorJitter.get_params( (0.75, 1.25), (0.75, 1.25), (0.75, 1.25), (-0.25, 0.25)) image = color_transform(image) # Random Gaussian filter if torch.rand(1) > 0.5: sigma = random.uniform(0.15, 1.15) image = image.filter(ImageFilter.GaussianBlur(radius=sigma)) # Random horizontal flipping if torch.rand(1) > 0.5: image = transforms_f.hflip(image) label = transforms_f.hflip(label) if logits is not None: logits = transforms_f.hflip(logits) # Transform to tensor image = transforms_f.to_tensor(image) label = (transforms_f.to_tensor(label) * 255).long() label[label == 255] = -1 # invalid pixels are re-mapped to index -1 if logits is not None: logits = transforms_f.to_tensor(logits) # Apply (ImageNet) normalisation image = transforms_f.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if logits is not None: return image, label, logits else: return image, label
def __call__(self, img): return F.crop(img, self.x1, self.y1, self.x2 - self.x1, self.y2 - self.y1)
def fit(source_image: Image.Image, target_size: Union[Tuple[int], int], fitting_mode="crop") -> Image.Image: """ Args: source_image: PIL Image target_size: Tuple of ints (height, width) or single int for square target fitting_mode: Either 'crop' or 'pad'. """ source_width, source_height = source_image.size if isinstance(target_size, int): target_height, target_width = target_size, target_size elif isinstance(target_size, tuple) and len(target_size) == 2: target_height, target_width = target_size else: raise TypeError("invalid type of target_size") source_ratio = source_height / source_width target_ratio = target_height / target_width target_image = None box_xmin, box_ymin, box_xmax, box_ymax = None, None, None, None if fitting_mode == "crop": if source_ratio == target_ratio: # simple resize target_image = resize(source_image, ( target_height, target_width, )) elif source_ratio > target_ratio: # align width, then crop overheight = int(source_height * (target_width / source_width)) target_image = resize(source_image, (overheight, target_width)) target_image = crop(target_image, int((overheight - target_height) / 2), 0, target_height, target_width) elif source_ratio < target_ratio: # align height, then crop overwidth = int(source_width * (target_height / source_height)) target_image = resize(source_image, (target_height, overwidth)) target_image = crop(target_image, 0, int((overwidth - target_width) / 2), target_height, target_width) # TODO: Implement crop box info if wanted elif fitting_mode == "pad": if source_ratio == target_ratio: # simple resize target_image = resize(source_image, ( target_height, target_width, )) box_xmin, box_ymin = 0, 0 box_xmax, box_ymax = target_width - 1, target_height - 1 elif source_ratio > target_ratio: # align height, then pad underwidth = int(source_width * (target_height / source_height)) target_image = resize(source_image, (target_height, underwidth)) target_image = pad( target_image, # add 1 in case (target_height - underheight) is odd, so to ensure putting out the desired size padding=( # left, top, right, bottom int((target_width - underwidth) / 2), 0, int((target_width - underwidth) / 2) + (target_width - underwidth) % 2, 0)) box_xmin, box_ymin = int((target_width - underwidth) / 2), 0 box_xmax, box_ymax = int((target_width - underwidth) / 2) + underwidth - 1, target_height - 1 elif source_ratio < target_ratio: # align width, then pad underheight = int(source_height * (target_width / source_width)) target_image = resize(source_image, (underheight, target_width)) target_image = pad( target_image, # add 1 in case (target_height - underheight) is odd, so to ensure putting out the desired size padding=( # left, top, right, bottom 0, int((target_height - underheight) / 2), 0, int((target_height - underheight) / 2) + (target_height - underheight) % 2)) box_xmin, box_ymin = 0, int((target_height - underheight) / 2) box_xmax, box_ymax = target_width - 1, int( (target_height - underheight) / 2) + underheight - 1 assert target_image.size[0] == target_width assert target_image.size[1] == target_height return target_image, box_xmin, box_ymin, box_xmax, box_ymax
def __getitem__(self, index): sequence_path = self.all_sequence_paths[index] df = pd.read_csv( sequence_path, header=None, index_col=False, names=['path', 'xmin', 'ymin', 'xmax', 'ymax', 'gazex', 'gazey']) show_name = sequence_path.split('/')[-3] clip = sequence_path.split('/')[-2] seq_len = len(df.index) # moving-avg smoothing window_size = 11 # should be odd number df['xmin'] = myutils.smooth_by_conv(window_size, df, 'xmin') df['ymin'] = myutils.smooth_by_conv(window_size, df, 'ymin') df['xmax'] = myutils.smooth_by_conv(window_size, df, 'xmax') df['ymax'] = myutils.smooth_by_conv(window_size, df, 'ymax') if not self.test: # cond for data augmentation cond_jitter = np.random.random_sample() cond_flip = np.random.random_sample() cond_color = np.random.random_sample() if cond_color < 0.5: n1 = np.random.uniform(0.5, 1.5) n2 = np.random.uniform(0.5, 1.5) n3 = np.random.uniform(0.5, 1.5) cond_crop = np.random.random_sample() # if longer than seq_len_limit, cut it down to the limit with the init index randomly sampled if seq_len > self.seq_len_limit: sampled_ind = np.random.randint(0, seq_len - self.seq_len_limit) seq_len = self.seq_len_limit else: sampled_ind = 0 if cond_crop < 0.5: sliced_x_min = df['xmin'].iloc[sampled_ind:sampled_ind + seq_len] sliced_x_max = df['xmax'].iloc[sampled_ind:sampled_ind + seq_len] sliced_y_min = df['ymin'].iloc[sampled_ind:sampled_ind + seq_len] sliced_y_max = df['ymax'].iloc[sampled_ind:sampled_ind + seq_len] sliced_gaze_x = df['gazex'].iloc[sampled_ind:sampled_ind + seq_len] sliced_gaze_y = df['gazey'].iloc[sampled_ind:sampled_ind + seq_len] check_sum = sliced_gaze_x.sum() + sliced_gaze_y.sum() all_outside = check_sum == -2 * seq_len # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target if all_outside: crop_x_min = np.min( [sliced_x_min.min(), sliced_x_max.min()]) crop_y_min = np.min( [sliced_y_min.min(), sliced_y_max.min()]) crop_x_max = np.max( [sliced_x_min.max(), sliced_x_max.max()]) crop_y_max = np.max( [sliced_y_min.max(), sliced_y_max.max()]) else: crop_x_min = np.min([ sliced_gaze_x.min(), sliced_x_min.min(), sliced_x_max.min() ]) crop_y_min = np.min([ sliced_gaze_y.min(), sliced_y_min.min(), sliced_y_max.min() ]) crop_x_max = np.max([ sliced_gaze_x.max(), sliced_x_min.max(), sliced_x_max.max() ]) crop_y_max = np.max([ sliced_gaze_y.max(), sliced_y_min.max(), sliced_y_max.max() ]) # Randomly select a random top left corner if crop_x_min >= 0: crop_x_min = np.random.uniform(0, crop_x_min) if crop_y_min >= 0: crop_y_min = np.random.uniform(0, crop_y_min) # Get image size path = os.path.join(self.data_dir, show_name, clip, df['path'].iloc[0]) img = Image.open(path) img = img.convert('RGB') width, height = img.size # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min) crop_width_min = crop_x_max - crop_x_min crop_height_min = crop_y_max - crop_y_min crop_width_max = width - crop_x_min crop_height_max = height - crop_y_min # Randomly select a width and a height crop_width = np.random.uniform(crop_width_min, crop_width_max) crop_height = np.random.uniform(crop_height_min, crop_height_max) else: sampled_ind = 0 faces, images, head_channels, heatmaps, paths, gazes, imsizes, gaze_inouts = [], [], [], [], [], [], [], [] index_tracker = -1 for i, row in df.iterrows(): index_tracker = index_tracker + 1 if not self.test: if index_tracker < sampled_ind or index_tracker >= ( sampled_ind + self.seq_len_limit): continue face_x1 = row['xmin'] # note: Already in image coordinates face_y1 = row['ymin'] # note: Already in image coordinates face_x2 = row['xmax'] # note: Already in image coordinates face_y2 = row['ymax'] # note: Already in image coordinates gaze_x = row['gazex'] # note: Already in image coordinates gaze_y = row['gazey'] # note: Already in image coordinates impath = os.path.join(self.data_dir, show_name, clip, row['path']) img = Image.open(impath) img = img.convert('RGB') width, height = img.size imsize = torch.FloatTensor([width, height]) # imsizes.append(imsize) face_x1, face_y1, face_x2, face_y2 = map( float, [face_x1, face_y1, face_x2, face_y2]) gaze_x, gaze_y = map(float, [gaze_x, gaze_y]) if gaze_x == -1 and gaze_y == -1: gaze_inside = False else: if gaze_x < 0: # move gaze point that was sliglty outside the image back in gaze_x = 0 if gaze_y < 0: gaze_y = 0 gaze_inside = True if not self.test: ## data augmentation # Jitter (expansion-only) bounding box size. if cond_jitter < 0.5: k = cond_jitter * 0.1 face_x1 -= k * abs(face_x2 - face_x1) face_y1 -= k * abs(face_y2 - face_y1) face_x2 += k * abs(face_x2 - face_x1) face_y2 += k * abs(face_y2 - face_y1) face_x1 = np.clip(face_x1, 0, width) face_x2 = np.clip(face_x2, 0, width) face_y1 = np.clip(face_y1, 0, height) face_y2 = np.clip(face_y2, 0, height) # Random Crop if cond_crop < 0.5: # Crop it img = TF.crop(img, crop_y_min, crop_x_min, crop_height, crop_width) # Record the crop's (x, y) offset offset_x, offset_y = crop_x_min, crop_y_min # convert coordinates into the cropped frame face_x1, face_y1, face_x2, face_y2 = face_x1 - offset_x, face_y1 - offset_y, face_x2 - offset_x, face_y2 - offset_y if gaze_inside: gaze_x, gaze_y = (gaze_x- offset_x), \ (gaze_y - offset_y) else: gaze_x = -1 gaze_y = -1 width, height = crop_width, crop_height # Flip? if cond_flip < 0.5: img = img.transpose(Image.FLIP_LEFT_RIGHT) x_max_2 = width - face_x1 x_min_2 = width - face_x2 face_x2 = x_max_2 face_x1 = x_min_2 if gaze_x != -1 and gaze_y != -1: gaze_x = width - gaze_x # Random color change if cond_color < 0.5: img = TF.adjust_brightness(img, brightness_factor=n1) img = TF.adjust_contrast(img, contrast_factor=n2) img = TF.adjust_saturation(img, saturation_factor=n3) # Face crop face = img.copy().crop( (int(face_x1), int(face_y1), int(face_x2), int(face_y2))) # Head channel image head_channel = imutils.get_head_box_channel( face_x1, face_y1, face_x2, face_y2, width, height, resolution=self.input_size, coordconv=False).unsqueeze(0) if self.transform is not None: img = self.transform(img) face = self.transform(face) # Deconv output if gaze_inside: gaze_x /= float(width) # fractional gaze gaze_y /= float(height) gaze_heatmap = torch.zeros( self.output_size, self.output_size) # set the size of the output gaze_map = imutils.draw_labelmap( gaze_heatmap, [gaze_x * self.output_size, gaze_y * self.output_size], 3, type='Gaussian') gazes.append(torch.FloatTensor([gaze_x, gaze_y])) else: gaze_map = torch.zeros(self.output_size, self.output_size) gazes.append(torch.FloatTensor([-1, -1])) faces.append(face) images.append(img) head_channels.append(head_channel) heatmaps.append(gaze_map) gaze_inouts.append(torch.FloatTensor([int(gaze_inside)])) if self.imshow: for i in range(len(faces)): fig = plt.figure(111) img = 255 - imutils.unnorm(images[i].numpy()) * 255 img = np.clip(img, 0, 255) plt.imshow(np.transpose(img, (1, 2, 0))) plt.imshow(imresize(heatmaps[i], (self.input_size, self.input_size)), cmap='jet', alpha=0.3) plt.imshow(imresize(1 - head_channels[i].squeeze(0), (self.input_size, self.input_size)), alpha=0.2) plt.savefig( os.path.join('debug', 'viz_%d_inout=%d.png' % (i, gaze_inouts[i]))) plt.close('all') faces = torch.stack(faces) images = torch.stack(images) head_channels = torch.stack(head_channels) heatmaps = torch.stack(heatmaps) gazes = torch.stack(gazes) gaze_inouts = torch.stack(gaze_inouts) # imsizes = torch.stack(imsizes) # print(faces.shape, images.shape, head_channels.shape, heatmaps.shape) if self.test: return images, faces, head_channels, heatmaps, gazes, gaze_inouts else: # train return images, faces, head_channels, heatmaps, gaze_inouts
def process_one(self, img): i, j, h, w = self.get_params(img, self.size) out = F.crop(img, i, j, h, w) if random.random() < 0.5: out = F.hflip(out) return self.transformer(out)
def bbox_augmentations( self, img, bboxes: List[List[int]], labels: List[int], size=(256, 256), scale=(0.08, 1.0), ratio=(0.75, 4 / 3), interpolation=Image.BILINEAR, ): """ Arguments: img: PIL Image bboxes: list of bounding boxes [[top, left, bot, right], ...] size: image size to convert to scale: range of size of the origin size cropped ratio: range of aspect ratio of the origin aspect ratio cropped """ top, left, bot, right = torchvision.transforms.RandomResizedCrop.get_params( img, scale, ratio) print("PARAMS =", (top, left, bot, right)) width, height = img.size # print(f"h = {height} w={width}") # top, left, bot, right = (0,0, height, width) # print("top, left, bot, right",top, left, bot, right) # does crop then resize # separated to make operations explicit img = F.crop(img, top, left, bot, right) img = F.resize(img, size=size, interpolation=interpolation) # equivalent one line expression # img = F.resized_crop(img, top, left, bot, right, size=size, interpolation=interpolation) final_boxlist = [] final_labels = [] # Assumes box list is [[top, left, bot, right], ...] for box, label in zip(bboxes, labels): # boxtop, boxleft, boxbot, boxright = box boxleft, boxtop, boxright, boxbot = box # remove cropped boxes if ((left >= boxright) or (top >= boxbot) or ((top + bot + 1) <= boxtop) or ((left + right + 1) <= boxleft)): # print("continuing") continue # cropping if top > boxtop: boxtop = 0 else: boxtop -= top if left > boxleft: boxleft = 0 else: boxleft -= left if (top + bot) <= boxbot: boxbot = bot else: boxbot -= top if (left + right) <= boxright: boxright = right else: boxright -= left # resizing # to match the same behavior of functional.resize boxtop, boxleft, boxbot, boxright = _resize_box( size, (bot, right), (boxtop, boxleft, boxbot, boxright)) # check if zero area if ((boxtop - boxbot) * (boxright - boxleft)) == 0: continue # Point ordering should match https://pytorch.org/docs/stable/_modules/torchvision/models/detection/faster_rcnn.html#fasterrcnn_resnet50_fpn final_boxlist.append([boxleft, boxtop, boxright, boxbot]) final_labels.append(label) return img, final_boxlist, final_labels
def augment_image(img, mask, box, net_input_shape, random_scale, random_displacement, random_flip): """ img, mask: numpy array of (height, width, 3) box: a [x,y,w,h] box at the center net_input_shape: input height and width of network (height, width) """ if len(img.shape) != 3 or img.shape[2] != 3: raise ValueError('Expecting image shape to be [H,W,3].') if not (len(mask.shape) == 3 and mask.shape[2] == 1): raise ValueError('Expecting mask shape to be [H,W,1].') img_height = img.shape[0] img_width = img.shape[1] net_height = net_input_shape[0] net_width = net_input_shape[1] # random scale if random_scale is not None: scale_factor = np.random.uniform(random_scale[0], random_scale[1]) else: scale_factor = 1.0 scaled_height = int(net_height * scale_factor) scaled_width = int(net_width * scale_factor) # randomly displace a little if random_displacement is not None: displacement_x = int( np.random.uniform(-random_displacement, random_displacement) * scaled_width) displacement_y = int( np.random.uniform(-random_displacement, random_displacement) * scaled_height) else: displacement_x = 0 displacement_y = 0 x, y, w, h = box x_center, y_center = x + w / 2, y + h / 2 crop_box = box_utils.int_box([ x_center - net_width / 2, y_center - net_height / 2, net_width, net_height ]) crop_box = box_utils.int_box(box_utils.rescale_box(crop_box, scale_factor)) crop_box = box_utils.shift_box(crop_box, displacement_x, displacement_y, img_width, img_height) # randomly rotate by 90k degrees, k = 0,1,2,3 random_hflip = random.randrange(2) random_vflip = random.randrange(2) # Begin transforms # numpy to Torch tensor img_aug = torch.from_numpy(img) mask_aug = torch.from_numpy(mask) # (H,W,C) -> (C,H,W) img_aug = img_aug.permute(2, 0, 1) mask_aug = mask_aug.permute(2, 0, 1) # Crop out the randomly scaled / displaced box x, y, w, h = crop_box img_aug = ttf.crop(img_aug, top=y, left=x, height=h, width=w) mask_aug = ttf.crop(mask_aug, top=y, left=x, height=h, width=w) # Resize to network size img_aug = ttf.resize(img_aug, [net_height, net_width]) mask_aug = ttf.resize(mask_aug, [net_height, net_width]) # Flip if needed if random_flip: if random_hflip > 0: img_aug = ttf.hflip(img_aug) mask_aug = ttf.hflip(mask_aug) if random_vflip > 0: img_aug = ttf.vflip(img_aug) mask_aug = ttf.vflip(mask_aug) return img_aug, mask_aug
def __getitem__(self, index): """ Return one sample and its label and extra information that we need later. :param index: int, the index of the sample within the whole dataset. :return: sample: pytorch.tensor of size (1, C, H, W) and datatype torch.FloatTensor. Where C is the number of color channels (=3), and H is the height of the patch, and W is its width. mask: PIL.Image.Image, the mask of the regions of interest. label: int, the label of the sample. """ # Force seeding: a workaround to deal with reproducibility when suing different number of workers if want to # preserve the reproducibility. Each sample has its won seed. reproducibility.force_seed(self.seeds[index]) if self.set_for_eval: error_msg = "Something wrong. You didn't ask to set the data ready for evaluation, but here we are " \ ".... [NOT OK]" assert self.inputs_ready is not None and self.labels_ready is not None, error_msg img = self.inputs_ready[index] mask = self.masks_ready[index] target = self.labels_ready[index] return img, mask, target if self.do_not_save_samples: img, mask, target = self.load_sample_i(index) else: assert self.preloaded, "Sorry, you need to preload the data first .... [NOT OK]" img, mask, target = self.images[index], self.masks[ index], self.labels[index] # Upscale on the fly. Sorry, this may add an extra time, but, we do not want to save in memory upscaled # images!!!! it takes a lot of space, especially for large datasets. So, compromise? upscale only when # necessary. # check if we need to upscale the image. Useful for Caltech-UCSD-Birds-200-2011. if self.up_scale_small_dim_to is not None: w, h = img.size w_up, h_up = self.get_upscaled_dims(w, h, self.up_scale_small_dim_to) img = img.resize((w_up, h_up), resample=PIL.Image.BILINEAR) # Upscale the image: only for Caltech-UCSD-Birds-200-2011. if self.randomCropper: # training only. Do not crop for evaluation. # Padding. if self.padding_size: w, h = img.size ph, pw = self.padding_size padding = (int(pw * w), int(ph * h)) img = TF.pad(img, padding=padding, padding_mode=self.padding_mode) mask = TF.pad( mask, padding=padding, padding_mode=self.padding_mode) # just for tracking. img, (i, j, h, w) = self.randomCropper(img) # print("Dadaloader Index {} i {} j {} seed {}".format(index, i, j, self.seeds[index])) # crop the mask mask = TF.crop( mask, i, j, h, w) # just for tracking. Not used for actual training. # Pad the image to be div. by 32 in both sides. if self.force_div_32: w, h = img.size pad_left, pad_right = self.get_padding(w, 32) pad_top, pad_bottom = self.get_padding(h, 32) padding = (pad_left, pad_top, pad_right, pad_bottom) img = TF.pad(img, padding=padding, padding_mode="reflect") # This is not necessary in training nor in test. It may be necessary during training if your patch size # is not dividable by 32 and you want to make it dividable by 32. # We are going to comment this. # if not self.set_for_eval_backup: # we want to keep the mask intact for evaluation. # just for tracking. Not used for training. # mask = TF.pad(mask, padding=padding, padding_mode="reflect") if self.transform_img: # just for training: do not transform the mask (since it is not used). img = self.transform_img(img) if self.transform_tensor: # just for training: do not transform the mask (since it is not used). img = self.transform_tensor(img) # Prepare the mask to be used on GPU to compute Dice index. mask = np.array(mask, dtype=np.float32) / 255. # full of 0 and 1. mask = self.to_tensor(np.expand_dims( mask, axis=-1)) # mak the mask with shape (h, w, 1). return img, mask, target
def __getitem__(self, index): sample_path = self.data_list[index].split() img = Image.open(os.path.join( self.dir_imgs, sample_path[self.idx_img])).convert("RGB") lidar = None depth = None item = [] if self.mode == 'train': depth = read_depth( os.path.join(self.dir_imgs, sample_path[self.idx_depth]), self.dataset_name, self.max_depth) if self.lidar_exist: lidar = read_depth( os.path.join(self.dir_imgs, sample_path[self.idx_lidar]), self.dataset_name, self.max_depth) else: if self.gen_sparse_online: lidar = self.to_sparse(image=depth) else: lidar = self.lidar_persudo # show(depth), show(lidar), show(img) # 增强 rsz_size = img.size[:: -1] if self.resize_size is None else self.resize_size # h*w crp_size = img.size[:: -1] if self.crop_size is None else self.crop_size # h*w depth_rsz = transforms.Compose( [transforms.ToPILImage(), transforms.Resize(rsz_size, 0)]) # 不用插值 img = transforms.Resize(rsz_size)(img) # 默认resize为双线性插值 depth = depth_rsz(depth) lidar = depth_rsz(lidar) # kitti的上部没有值,先裁剪掉 if self.dataset_name == 'kitti': img = F.crop(img, rsz_size[0] - crp_size[0], 0, crp_size[0], rsz_size[1]) depth = F.crop(depth, rsz_size[0] - crp_size[0], 0, crp_size[0], rsz_size[1]) lidar = F.crop(lidar, rsz_size[0] - crp_size[0], 0, crp_size[0], rsz_size[1]) img = np.asarray(img, dtype=np.float32) / 255.0 depth = np.asarray(depth) lidar = np.asarray(lidar) # li = cv2.resize(lidar.astype(np.uint16), rsz_size[::-1], 0) # opencv的resize会增大稀疏点的比例 if self.aug: img, depth, lidar = self.augment_3(img, depth, lidar, crp_size, self.degree) # 标准化 img = self.img_process(img.copy()) depth = self.to_tensor(depth.copy()) lidar = self.to_tensor(lidar.copy()) item = [img, lidar, depth] elif self.mode == 'val': depth = read_depth( os.path.join(self.dir_imgs, sample_path[self.idx_depth]), self.dataset_name, self.max_depth) if self.lidar_exist: lidar = read_depth( os.path.join(self.dir_imgs, sample_path[self.idx_lidar]), self.dataset_name, self.max_depth) else: if self.gen_sparse_online: lidar = self.to_sparse(image=depth) else: lidar = self.lidar_persudo img = np.asarray(img, dtype=np.float32) / 255.0 # pad为网络可输入的大小 h_ori, w_ori = img.shape[0], img.shape[1] h_pad = int(np.ceil(h_ori / self.mul_times) * self.mul_times) w_pad = int(np.ceil(w_ori / self.mul_times) * self.mul_times) img = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=img) lidar = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=lidar) depth = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=depth) lidar = lidar.astype(np.float32) depth = depth.astype(np.float32) # 标准化 img = self.img_process(img.copy()) depth = self.to_tensor(depth.copy()) lidar = self.to_tensor(lidar.copy()) item = [img, lidar, depth] elif self.mode == 'test': if self.lidar_exist: lidar = read_depth( os.path.join(self.dir_imgs, sample_path[self.idx_lidar]), self.dataset_name, self.max_depth) else: lidar = self.lidar_persudo img = np.asarray(img, dtype=np.float32) / 255.0 # pad为网络可输入的大小 h_ori, w_ori = img.shape[0], img.shape[1] h_pad = int(np.ceil(h_ori / self.mul_times) * self.mul_times) w_pad = int(np.ceil(w_ori / self.mul_times) * self.mul_times) img = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=img) lidar = iaa.CenterPadToFixedSize(height=h_pad, width=w_pad)(image=lidar) lidar = lidar.astype(np.float32) # 标准化 img = self.img_process(img.copy()) lidar = self.to_tensor(lidar.copy()) item = [img, lidar, lidar] return item
def __getitem__(self, idx): """ Function to get a sample from the dataset. First both RGB and Semantic images are read in PIL format. Then transformations are applied from PIL to Numpy arrays to Tensors. For regular usage: - Images should be outputed with dimensions (3, W, H) - Semantic Images should be outputed with dimensions (1, W, H) In the case that 10-crops are used: - Images should be outputed with dimensions (10, 3, W, H) - Semantic Images should be outputed with dimensions (10, 1, W, H) :param idx: Index :return: Dictionary containing {RGB image, semantic segmentation mask, scene category index} """ # Get RGB image path and load it img_name = os.path.join(self.image_dir, self.set, self.labels[idx], self.filenames[idx]) img = Image.open(img_name) # Convert it to RGB if gray-scale if img.mode is not "RGB": img = img.convert("RGB") # Load semantic segmentation mask filename_sem = self.filenames[idx][0:self.filenames[idx].find('.jpg')] sem_name = os.path.join(self.image_dir, "noisy_annotations_RGB", self.set, self.labels[idx], (filename_sem + ".png")) sem = Image.open(sem_name) # Load semantic segmentation scores filename_scores = self.filenames[idx][0:self.filenames[idx].find('.jpg' )] sem_score_name = os.path.join(self.image_dir, "noisy_scores_RGB", self.set, self.labels[idx], (filename_scores + ".png")) semScore = Image.open(sem_score_name) # Apply transformations depending on the set (train, val) if self.set is "train": # Define Random crop. If image is smaller resize first. bilinearResize_trans = transforms.Resize(self.resizeSize) nearestResize_trans = transforms.Resize( self.resizeSize, interpolation=Image.NEAREST) img = bilinearResize_trans(img) sem = nearestResize_trans(sem) semScore = bilinearResize_trans(semScore) # Extract Random Crop parameters i, j, h, w = transforms.RandomCrop.get_params( img, output_size=(self.outputSize, self.outputSize)) # Apply Random Crop parameters img = TF.crop(img, i, j, h, w) sem = TF.crop(sem, i, j, h, w) semScore = TF.crop(semScore, i, j, h, w) # Random horizontal flipping if random.random() > 0.5: img = TF.hflip(img) sem = TF.hflip(sem) semScore = TF.hflip(semScore) # Apply transformations from ImgAug library img = np.asarray(img) sem = np.asarray(sem) semScore = np.asarray(semScore) img = np.squeeze( self.seq.augment_images(np.expand_dims(img, axis=0))) if self.SemRGB: sem = np.squeeze( self.seq_sem.augment_images(np.expand_dims(sem, 0))) semScore = np.squeeze( self.seq_sem.augment_images(np.expand_dims(semScore, 0))) else: sem = np.squeeze( self.seq_sem.augment_images( np.expand_dims(np.expand_dims(sem, 0), 3))) semScore = np.squeeze( self.seq_sem.augment_images( np.expand_dims(np.expand_dims(semScore, 0), 3))) # Apply not random transforms. To tensor and normalization for RGB. To tensor for semantic segmentation. img = self.train_transforms_img(img) sem = self.train_transforms_sem(sem) semScore = self.train_transforms_scores(semScore) else: img = self.val_transforms_img(img) sem = self.val_transforms_sem(sem) semScore = self.val_transforms_scores(semScore) # Final control statements if not self.TenCrop: if not self.SemRGB: assert img.shape[0] == 3 and img.shape[ 1] == self.outputSize and img.shape[2] == self.outputSize assert sem.shape[0] == 1 and sem.shape[ 1] == self.outputSize and sem.shape[2] == self.outputSize assert semScore.shape[0] == 1 and semScore.shape[ 1] == self.outputSize and semScore.shape[ 2] == self.outputSize else: assert img.shape[0] == 3 and img.shape[ 1] == self.outputSize and img.shape[2] == self.outputSize assert sem.shape[0] == 3 and sem.shape[ 1] == self.outputSize and sem.shape[2] == self.outputSize assert semScore.shape[0] == 3 and semScore.shape[ 1] == self.outputSize and semScore.shape[ 2] == self.outputSize else: if not self.SemRGB: assert img.shape[0] == 10 and img.shape[ 2] == self.outputSize and img.shape[3] == self.outputSize assert sem.shape[0] == 10 and sem.shape[ 2] == self.outputSize and sem.shape[3] == self.outputSize assert semScore.shape[0] == 10 and semScore.shape[ 2] == self.outputSize and semScore.shape[ 3] == self.outputSize else: assert img.shape[0] == 10 and img.shape[ 2] == self.outputSize and img.shape[3] == self.outputSize assert sem.shape[0] == 10 and sem.shape[ 2] == self.outputSize and sem.shape[3] == self.outputSize assert semScore.shape[0] == 10 and semScore.shape[ 2] == self.outputSize and semScore.shape[ 3] == self.outputSize # Create dictionary self.sample = { 'Image': img, 'Semantic': sem, 'Semantic Scores': semScore, 'Scene Index': self.classes.index(self.labels[idx]) } return self.sample
def torchvision_transform(self, img): img = torchvision.crop(img, top=0, left=0, height=64, width=64) return torchvision.resize(img, (512, 512))
def __getitem__(self, index): if self.test: g = self.X_test.get_group(self.keys[index]) cont_gaze = [] for i, row in g.iterrows(): path = row['path'] x_min = row['bbox_x_min'] y_min = row['bbox_y_min'] x_max = row['bbox_x_max'] y_max = row['bbox_y_max'] eye_x = row['eye_x'] eye_y = row['eye_y'] gaze_x = row['gaze_x'] gaze_y = row['gaze_y'] cont_gaze.append([gaze_x, gaze_y ]) # all ground truth gaze are stacked up for j in range(len(cont_gaze), 20): cont_gaze.append( [-1, -1]) # pad dummy gaze to match size for batch processing cont_gaze = torch.FloatTensor(cont_gaze) gaze_inside = True # always consider test samples as inside else: path = self.X_train.iloc[index] eye_x, eye_y, gaze_x, gaze_y = self.y_train.iloc[index] gaze_inside = True # bool(inout) img = Image.open(os.path.join(self.data_dir, path)) img = img.convert('RGB') width, height = img.size # print('gaze coords: ', type(gaze_x), type(gaze_y), gaze_x, gaze_y) # print('eye coords: ', type(eye_x), type(eye_y), eye_x, eye_y) # expand face bbox a bit k = 0.1 x_min = (eye_x - 0.15) * width y_min = (eye_y - 0.15) * height x_max = (eye_x + 0.15) * width y_max = (eye_y + 0.15) * height if x_min < 0: x_min = 0 if y_min < 0: y_min = 0 if x_max < 0: x_max = 0 if y_max < 0: y_max = 0 x_min -= k * abs(x_max - x_min) y_min -= k * abs(y_max - y_min) x_max += k * abs(x_max - x_min) y_max += k * abs(y_max - y_min) # x_min = eye_x - 0.15 # y_min = eye_y - 0.15 # x_max = eye_x + 0.15 # y_max = eye_y + 0.15 # if x_min < 0: # x_min = 0 # if y_min < 0: # y_min = 0 # if x_max < 0: # x_max = 0 # if y_max < 0: # y_max = 0 # print('bbx', [x_min, y_min, x_max, y_max]) x_min, y_min, x_max, y_max = map(float, [x_min, y_min, x_max, y_max]) # print(x_min, y_min, x_max, y_max) if self.imshow: img.save("origin_img.jpg") if self.test: imsize = torch.IntTensor([width, height]) else: ## data augmentation # Jitter (expansion-only) bounding box size if np.random.random_sample() <= 0.5: k = np.random.random_sample() * 0.2 x_min -= k * abs(x_max - x_min) y_min -= k * abs(y_max - y_min) x_max += k * abs(x_max - x_min) y_max += k * abs(y_max - y_min) # Random Crop if np.random.random_sample() <= 0.5: # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target crop_x_min = np.min([gaze_x * width, x_min, x_max]) crop_y_min = np.min([gaze_y * height, y_min, y_max]) crop_x_max = np.max([gaze_x * width, x_min, x_max]) crop_y_max = np.max([gaze_y * height, y_min, y_max]) # Randomly select a random top left corner if crop_x_min >= 0: crop_x_min = np.random.uniform(0, crop_x_min) if crop_y_min >= 0: crop_y_min = np.random.uniform(0, crop_y_min) # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min) crop_width_min = crop_x_max - crop_x_min crop_height_min = crop_y_max - crop_y_min crop_width_max = width - crop_x_min crop_height_max = height - crop_y_min # Randomly select a width and a height crop_width = np.random.uniform(crop_width_min, crop_width_max) crop_height = np.random.uniform(crop_height_min, crop_height_max) # Crop it img = TF.crop(img, crop_y_min, crop_x_min, crop_height, crop_width) # Record the crop's (x, y) offset offset_x, offset_y = crop_x_min, crop_y_min # convert coordinates into the cropped frame x_min, y_min, x_max, y_max = x_min - offset_x, y_min - offset_y, x_max - offset_x, y_max - offset_y # if gaze_inside: gaze_x, gaze_y = (gaze_x * width - offset_x) / float(crop_width), \ (gaze_y * height - offset_y) / float(crop_height) # else: # gaze_x = -1; gaze_y = -1 width, height = crop_width, crop_height # Random flip if np.random.random_sample() <= 0.5: img = img.transpose(Image.FLIP_LEFT_RIGHT) x_max_2 = width - x_min x_min_2 = width - x_max x_max = x_max_2 x_min = x_min_2 gaze_x = 1 - gaze_x # Random color change if np.random.random_sample() <= 0.5: img = TF.adjust_brightness(img, brightness_factor=np.random.uniform( 0.5, 1.5)) img = TF.adjust_contrast(img, contrast_factor=np.random.uniform( 0.5, 1.5)) img = TF.adjust_saturation(img, saturation_factor=np.random.uniform( 0, 1.5)) # print('bbx2', [x_min, y_min, x_max, y_max]) head_channel = imutils.get_head_box_channel( x_min, y_min, x_max, y_max, width, height, resolution=self.input_size, coordconv=False).unsqueeze(0) # Crop the face face = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) if self.imshow: img.save("img_aug.jpg") face.save('face_aug.jpg') if self.transform is not None: img = self.transform(img) face = self.transform(face) # print('imsize2', img.size()) # generate the heat map used for deconv prediction gaze_heatmap = torch.zeros( self.output_size, self.output_size) # set the size of the output # print([gaze_x * self.output_size, gaze_y * self.output_size]) # print(self.output_size) if self.test: # aggregated heatmap num_valid = 0 for gaze_x, gaze_y in cont_gaze: if gaze_x != -1: num_valid += 1 gaze_heatmap = imutils.draw_labelmap( gaze_heatmap, [gaze_x * self.output_size, gaze_y * self.output_size], 3, type='Gaussian') gaze_heatmap /= num_valid else: # if gaze_inside: gaze_heatmap = imutils.draw_labelmap( gaze_heatmap, [gaze_x * self.output_size, gaze_y * self.output_size], 3, type='Gaussian') if self.imshow: fig = plt.figure(111) img = 255 - imutils.unnorm(img.numpy()) * 255 img = np.clip(img, 0, 255) plt.imshow(np.transpose(img, (1, 2, 0))) plt.imshow(imresize(gaze_heatmap, (self.input_size, self.input_size)), cmap='jet', alpha=0.3) plt.imshow(imresize(1 - head_channel.squeeze(0), (self.input_size, self.input_size)), alpha=0.2) plt.savefig('viz_aug.png') if self.test: return img, face, head_channel, gaze_heatmap, cont_gaze, imsize, path else: return img, face, head_channel, gaze_heatmap, path, gaze_inside
def torchvision(self, img): return torchvision.crop(img, i=0, j=0, h=64, w=64)
def __call__(self, img: Image.Image): if self.params is None: self.params = T.RandomCrop.get_params(img, self.out_size) return TF.crop(img, *self.params)
def __call__(self, imgs): i, j, h, w = self.get_params(imgs[0], self.size) out = [F.crop(img, i, j, h, w) for img in imgs] if random.random() < 0.5: out = [F.hflip(img) for img in out] return [self.transformer(img) for img in out]
def __call__(self, img, label): crop_x = np.random.randint(low=0, high=33) crop_y = np.random.randint(low=0, high=33) return F.crop(img, crop_y, crop_x, 224, 224), label
def __call__(self, images): name = images["name"] x = images["x"] y = images["y"] if self.nuc: n = images["n"] # check to see if y contains nucleoli and reset the process if it does not if self.Random_Crops: wpix = 0 while wpix < self.Wpix_Threshold: # Random crops i, j, h, w = transforms.RandomCrop.get_params( x, output_size=self.crop_size) x = TF.crop(x, i, j, h, w) y = TF.crop(y, i, j, h, w) y_array = np.array(y) / 255 if self.nuc: n = TF.crop(n, i, j, h, w) wpix = np.sum(y_array) / (y_array.shape[0] * y_array.shape[1]) if wpix < self.Wpix_Threshold: local_keep_prob = random.random() if local_keep_prob < self.Keep_Prob: break else: x = images["x"] y = images["y"] if self.augment: # Random horizontal flip if random.random() > 0.5: x = TF.hflip(x) y = TF.hflip(y) if self.nuc: n = TF.hflip(n) # Random vertical flip if random.random() > 0.5: x = TF.vflip(x) y = TF.vflip(y) if self.nuc: n = TF.vflip(n) # Random Rotation d = random.randint(-180, 180) x = TF.rotate(x, d) y = TF.rotate(y, d) if self.nuc: n = TF.rotate(n, d) x = TF.to_tensor(x) y = TF.to_tensor(y) if self.nuc: n = TF.to_tensor(n) if not self.nuc: pair = {"x": x, "y": y, "name": name} else: pair = {"x": x, "y": y, "n": n, "name": name} return pair
def crop_image(image_np, coors, crop_size=224): image_pil = Image.fromarray(image_np) top_left = [max(0, x - crop_size // 2) for x in coors[::-1]] image_crop = crop(image_pil, *top_left, crop_size, crop_size) return image_crop
# Image size: 320 x 240 letters = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y' ] tt = transforms.ToTensor() improve_letters = ['q'] source_path = "./test" dest_path = "./test2" for letter in improve_letters: for i in range(20): image1 = Image.open( os.path.join(source_path, letter, str(i) + "_1.png")) image2 = Image.open( os.path.join(source_path, letter, str(i) + "_2.png")) image1 = functional.crop(image1, 0, 15, 240, 240) image2 = functional.crop(image2, 0, 15, 240, 240) utils.save_image(tt(image1), os.path.join(dest_path, letter, str(i) + "_1.png")) utils.save_image(tt(image2), os.path.join(dest_path, letter, str(i) + "_2.png"))
def __init__(self, dataroot, train=True, augment=True): self.images = [] self.bubbles = [] self.labels = [] dataset = [] # contains tuples of images and associated 360 bubbles if train: file = np.loadtxt(os.path.join(dataroot, "dataset_train.txt"), dtype=str, skiprows=3) else: file = np.loadtxt(os.path.join(dataroot, "dataset_test.txt"), dtype=str, skiprows=3) # load image pairs and create training/validation labels for pair in file: if 'right' in pair[1]: self.labels.extend([i for i in range(3, 6)]) elif 'left' in pair[1]: self.labels.extend([i for i in range(0, 3)]) dataset.extend([pair for i in range(3)]) # calculate dataset length self.data_len = len(dataset) # transformations when loading images: PIL = transforms.ToPILImage() resize = transforms.Compose([transforms.Resize(300)]) bub_size = transforms.Resize(500) if augment: self.image_trans = transforms.Compose([ transforms.RandomCrop(224), transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.05), transforms.ToTensor() ]) self.bubble_trans = transforms.Compose([ transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.05), transforms.ToTensor() ]) else: self.image_trans = transforms.Compose( [transforms.Resize(300), transforms.ToTensor()]) self.bubble_trans = transforms.Compose([transforms.ToTensor()]) # load images and applying initial preprocessing for i, (bubble, image) in enumerate(dataset): image = io.imread(os.path.join(dataroot, image), plugin='matplotlib') bubble = io.imread(os.path.join(dataroot, bubble), plugin='matplotlib') label = torch.tensor(self.labels[i]) # cropping parameters in height and width, this assumes images of shape (2100, 2800) params = [1300, 1300] # set left pixel of the image crop depending on label if label == 0 or label == 3: width = 0 elif label == 1 or label == 4: width = int((image.shape[1] - params[1]) / 2) elif label == 2 or label == 5: width = int(image.shape[1] - (params[1] + 1)) # set height of image crop height = int((image.shape[0] - params[0]) / 2) # pre-process image files image = PIL(image) image = TF.crop(image, height, width, params[0], params[1]) image = resize(image) self.images.append(image) # preprocess bubble and add the array bubble = PIL(bubble) bubble = bub_size(bubble) self.bubbles.append(bubble)
def __call__(self, sample): image, label, label_c, label_m, label_gt = \ sample['image'], sample['label'], sample['label_c'], sample['label_m'], sample['label_gt'] if self.precise_contour: pil_masks = sample['pil_masks'] weight = None if self.augment: if self.color_equalize and random.random() > 0.5: image = clahe(image) # perform RandomResize() or just enlarge for image size < model input size if random.random() > 0.5: new_size = int( random.uniform(self.min_scale, self.max_scale) * np.min(image.size)) else: new_size = int(np.min(image.size)) if new_size < np.max(self.size): # make it viable for cropping new_size = int(np.max(self.size)) image, label, label_c, label_m = [ tx.resize(x, new_size) for x in (image, label, label_c, label_m) ] if self.precise_contour: # regenerate all resized masks (bilinear interpolation) and compose them afterwards pil_masks = [tx.resize(m, new_size) for m in pil_masks] label_gt = compose_mask(pil_masks, pil=True) else: # label_gt use NEAREST instead of BILINEAR (default) to avoid polluting instance labels after augmentation label_gt = tx.resize(label_gt, new_size, interpolation=Image.NEAREST) # perform RandomCrop() i, j, h, w = transforms.RandomCrop.get_params(image, self.size) image, label, label_c, label_m, label_gt = [ tx.crop(x, i, j, h, w) for x in (image, label, label_c, label_m, label_gt) ] if self.precise_contour: pil_masks = [tx.crop(m, i, j, h, w) for m in pil_masks] # Note: RandomResizedCrop() is popularly used to train the Inception networks, but might not the best choice for segmentation? # # perform RandomResizedCrop() # i, j, h, w = transforms.RandomResizedCrop.get_params( # image, # scale=(0.5, 1.0) # ratio=(3. / 4., 4. / 3.) # ) # # label_gt use NEAREST instead of BILINEAR (default) to avoid polluting instance labels after augmentation # image, label, label_c, label_m = [tx.resized_crop(x, i, j, h, w, self.size) for x in (image, label, label_c, label_m)] # label_gt = tx.resized_crop(label_gt, i, j, h, w, self.size, interpolation=Image.NEAREST) # perform Elastic Distortion if self.elastic_distortion and random.random() > 0.75: indices = ElasticDistortion.get_params(image) image, label, label_c, label_m = [ ElasticDistortion.transform(x, indices) for x in (image, label, label_c, label_m) ] if self.precise_contour: pil_masks = [ ElasticDistortion.transform(m, indices) for m in pil_masks ] label_gt = compose_mask(pil_masks, pil=True) else: label_gt = ElasticDistortion.transform( label_gt, indices, spline_order=0 ) # spline_order=0 to avoid polluting instance labels # perform RandomHorizontalFlip() if random.random() > 0.5: image, label, label_c, label_m, label_gt = [ tx.hflip(x) for x in (image, label, label_c, label_m, label_gt) ] # perform RandomVerticalFlip() if random.random() > 0.5: image, label, label_c, label_m, label_gt = [ tx.vflip(x) for x in (image, label, label_c, label_m, label_gt) ] # perform Random Rotation (0, 90, 180, and 270 degrees) random_degree = random.randint(0, 3) * 90 image, label, label_c, label_m, label_gt = [ tx.rotate(x, random_degree) for x in (image, label, label_c, label_m, label_gt) ] # perform random color invert, assuming 3 channels (rgb) images if self.color_invert and random.random() > 0.5: image = ImageOps.invert(image) # perform ColorJitter() if self.color_jitter and random.random() > 0.5: color = transforms.ColorJitter.get_params(0.5, 0.5, 0.5, 0.25) image = color(image) elif self.resize: # resize down image image, label, label_c, label_m = [ tx.resize(x, self.size) for x in (image, label, label_c, label_m) ] if self.precise_contour: pil_masks = [tx.resize(m, self.size) for m in pil_masks] label_gt = compose_mask(pil_masks, pil=True) else: label_gt = tx.resize(label_gt, self.size, interpolation=Image.NEAREST) # replaced with 'thinner' contour based on augmented/transformed mask if self.detect_contour: label_c, label_m, weight = get_instances_contour_interior( np.asarray(label_gt)) label_c, label_m = Image.fromarray(label_c), Image.fromarray( label_m) # Due to resize algorithm may introduce anti-alias edge, aka. non binary value, # thereafter map every pixel back to 0 and 255 if self.label_binary: label, label_c, label_m = [ x.point(lambda p, threhold=100: 255 if p > threhold else 0) for x in (label, label_c, label_m) ] # For train contour only, leverage the merged instances contour label (label_c) # the side effect is losing instance count information if self.only_contour: label_gt = label_c # perform ToTensor() if self.tensor: image, label, label_c, label_m, label_gt = \ [tx.to_tensor(x) for x in (image, label, label_c, label_m, label_gt)] # perform Normalize() image = tx.normalize(image, self.mean, self.std) # prepare a shadow copy of composed data to avoid screwup cached data x = sample.copy() x['image'], x['label'], x['label_c'], x['label_m'], x['label_gt'] = \ image, label, label_c, label_m, label_gt if self.weight_map and weight is not None: weight = np.expand_dims(weight, 0) x['weight'] = torch.from_numpy(weight) if 'pil_masks' in x: del x['pil_masks'] return x
def __call__(self, img): i, j, h, w = self.params return F.crop(img, i, j, h, w)
def __call__(self, data): hr, lr = data x, y, h, w = self.setting_window(hr, self.crop_size) return F.crop(hr, x, y, h, w), F.crop(lr, x, y, h, w)
def get_indexes(mask_height, boxes_params, batch_size, steps): pi = torch.as_tensor(np.pi) batch_edges = [] batch_edges_left = [] batch_edges_right = [] batch_edges_top = [] batch_edges_bottom = [] for i in range(batch_size): img = mask_height[i] edges = [] left_edges = [] right_edges = [] top_edges = [] bottom_edges = [] for j in range(steps): y = boxes_params[i][j][0] x = boxes_params[i][j][1] angle = boxes_params[i][j][2] width = boxes_params[i][j][3] length = boxes_params[i][j][4] # 如果宽度过小,就直接视为不可行 if width < 5: edges.append(0) left_edges.append(0) right_edges.append(0) top_edges.append(0) bottom_edges.append(0) continue top = int(y - length / 2) left = int(x - width / 2) rt_angle = -float((angle / pi * 180)) rectified_img = VisionF.rotate(img=img.view(1, 1, 300, 300), angle=rt_angle, center=(x, y)) crop_img = VisionF.crop(rectified_img, top, left, int(length), int(width)) resized_img = VisionF.resize( crop_img, [50, 100]).squeeze().cpu().data.numpy() # 获取图像各边缘宽度 edge, edge_left, edge_right, edge_top, edge_bottom = get_edge( resized_img) edges.append(edge) left_edges.append(edge_left) right_edges.append(edge_right) top_edges.append(edge_top) bottom_edges.append(edge_bottom) if edge * width / 100 > 3: break # 如果这是第一个,且存在无碰撞区域,那就不往后找了,针对这个做优化就行了,跟原来的思路一样,相当于先检查一次 if j == 1 and edge > 0: break batch_edges.append(edges) batch_edges_left.append(left_edges) batch_edges_right.append(right_edges) batch_edges_top.append(top_edges) batch_edges_bottom.append(bottom_edges) # 从里面确定每张图最优的参数 indexes = [] # 用来表征位置优化方向的state表 directions = [] for edges, left_edges, right_edges, top_edges, bottom_edges in zip( batch_edges, batch_edges_left, batch_edges_right, batch_edges_top, batch_edges_bottom): index = np.argmax(edges) if np.max(edges) == 0: edges_lr = (left_edges + right_edges) if max(edges_lr) > 0: index = np.argmax(edges_lr) if index >= len(edges): index = index - len(edges) indexes.append(index) direction = 0 # 通过比较各边的边缘宽度来判定位置优化的方向 edge_range = max(left_edges[index], right_edges[index]) if abs(left_edges[index] - right_edges[index]) > edge_range // 2: direction = 1 if left_edges[index] > right_edges[index] else 2 edge_range = max(top_edges[index], bottom_edges[index]) if abs(top_edges[index] - bottom_edges[index]) > edge_range // 2: direction = 3 if top_edges[index] > bottom_edges[index] else 4 directions.append(direction) return indexes, batch_edges, batch_edges_left, batch_edges_right, batch_edges_top, batch_edges_bottom, directions
def transform(self, image_a, image_b, mask, semantic_a, semantic_b): """Apply transformations to image and corresponding mask. Transformations applied are: random horizontal flipping, resizing, random cropping and normalizing Arguments: image_a {Image} -- Image image_b {Image} -- Image mask {Image} -- Mask Returns: image_a, image_b, mask {Image, Image, Image} -- transformed image_a, pair image_b and mask """ # Random horizontal flipping if torch.rand(1) > 0.5: image_a = image_a.transpose(Image.FLIP_LEFT_RIGHT) image_b = image_b.transpose(Image.FLIP_LEFT_RIGHT) mask = mask.transpose(Image.FLIP_LEFT_RIGHT) semantic_a = semantic_a.transpose(Image.FLIP_LEFT_RIGHT) semantic_b = semantic_b.transpose(Image.FLIP_LEFT_RIGHT) # print('debugging mask transform 2 size',mask.size) # Resize resize = transforms.Resize(size=self.new_size) image_a = resize(image_a) image_b = resize(image_b) # print('dim image after resize',image.size) # Resize mask mask = mask.resize((image_b.width, image_b.height), Image.NEAREST) semantic_a = semantic_a.resize((image_b.width, image_b.height), Image.NEAREST) semantic_b = semantic_b.resize((image_b.width, image_b.height), Image.NEAREST) # print('debugging mask transform 3 size',mask.size) # Random crop i, j, h, w = transforms.RandomCrop.get_params(image_b, output_size=(self.height, self.width)) image_a = F.crop(image_a, i, j, h, w) image_b = F.crop(image_b, i, j, h, w) mask = F.crop(mask, i, j, h, w) semantic_a = F.crop(semantic_a, i, j, h, w) semantic_b = F.crop(semantic_b, i, j, h, w) # print('debugging mask transform 4 size',mask.size) # Transform to tensor to_tensor = transforms.ToTensor() image_a = to_tensor(image_a) image_b = to_tensor(image_b) semantic_a = to_tensor(semantic_a) * 255 #to_tensor clip to 0:1 semantic_b = to_tensor(semantic_b) * 255 semantic_a = mapping(semantic_a) semantic_b = mapping(semantic_b) if np.max(mask) == 1: mask = to_tensor(mask) * 255 else: mask = to_tensor(mask) mask[mask > 0.5] = 1 mask[mask < 0.5] = 0 # print('debugging mask transform 5 size',mask.size) # Normalize normalizer = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) image_a = normalizer(image_a) image_b = normalizer(image_b) #print(torch.unique(mask)) #print(torch.unique(semantic_a)) return image_a, image_b, mask, semantic_a, semantic_b
def forward( self, image: Tensor, target: Optional[Dict[str, Tensor]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: if target is None: raise ValueError("The targets can't be None for this transform.") if isinstance(image, torch.Tensor): if image.ndimension() not in {2, 3}: raise ValueError( f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions." ) elif image.ndimension() == 2: image = image.unsqueeze(0) orig_w, orig_h = F.get_image_size(image) while True: # sample an option idx = int(torch.randint(low=0, high=len(self.options), size=(1, ))) min_jaccard_overlap = self.options[idx] if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option return image, target for _ in range(self.trials): # check the aspect ratio limitations r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2) new_w = int(orig_w * r[0]) new_h = int(orig_h * r[1]) aspect_ratio = new_w / new_h if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio): continue # check for 0 area crops r = torch.rand(2) left = int((orig_w - new_w) * r[0]) top = int((orig_h - new_h) * r[1]) right = left + new_w bottom = top + new_h if left == right or top == bottom: continue # check for any valid boxes with centers within the crop area cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2]) cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3]) is_within_crop_area = (left < cx) & (cx < right) & ( top < cy) & (cy < bottom) if not is_within_crop_area.any(): continue # check at least 1 box with jaccard limitations boxes = target["boxes"][is_within_crop_area] ious = torchvision.ops.boxes.box_iou( boxes, torch.tensor([[left, top, right, bottom]], dtype=boxes.dtype, device=boxes.device)) if ious.max() < min_jaccard_overlap: continue # keep only valid boxes and perform cropping target["boxes"] = boxes target["labels"] = target["labels"][is_within_crop_area] target["boxes"][:, 0::2] -= left target["boxes"][:, 1::2] -= top target["boxes"][:, 0::2].clamp_(min=0, max=new_w) target["boxes"][:, 1::2].clamp_(min=0, max=new_h) image = F.crop(image, top, left, new_h, new_w) return image, target
def torchvision(self, img): return torchvision.crop(img, i=0, j=0, h=64, w=64)
def torchvision_transform(self, img): return torchvision.crop(img, top=0, left=0, height=64, width=64)