Beispiel #1
0
    def __call__(self, image):
        image = [self.preprocess(c) for c in image]

        i, j, h, w = self.get_params(image[0], self.size)

        return [F.crop(c, i, j, h, w) for c in image]
Beispiel #2
0
 def __call__(self, image):
     image = F.crop(image, self.top, self.left, self.height, self.width)
     return image
Beispiel #3
0
 def fun(o):
     return F.crop(o, i, j, h, w)
Beispiel #4
0
def transform(image,
              label,
              logits=None,
              crop_size=(512, 512),
              scale_size=(0.8, 1.0),
              augmentation=True):
    # Random rescale image
    raw_w, raw_h = image.size
    scale_ratio = random.uniform(scale_size[0], scale_size[1])

    resized_size = (int(raw_h * scale_ratio), int(raw_w * scale_ratio))
    image = transforms_f.resize(image, resized_size, Image.BILINEAR)
    label = transforms_f.resize(label, resized_size, Image.NEAREST)
    if logits is not None:
        logits = transforms_f.resize(logits, resized_size, Image.NEAREST)

    # Add padding if rescaled image size is less than crop size
    if crop_size == -1:  # use original im size without crop or padding
        crop_size = (raw_h, raw_w)

    if crop_size[0] > resized_size[0] or crop_size[1] > resized_size[1]:
        right_pad, bottom_pad = max(crop_size[1] - resized_size[1],
                                    0), max(crop_size[0] - resized_size[0], 0)
        image = transforms_f.pad(image,
                                 padding=(0, 0, right_pad, bottom_pad),
                                 padding_mode='reflect')
        label = transforms_f.pad(label,
                                 padding=(0, 0, right_pad, bottom_pad),
                                 fill=255,
                                 padding_mode='constant')
        if logits is not None:
            logits = transforms_f.pad(logits,
                                      padding=(0, 0, right_pad, bottom_pad),
                                      fill=0,
                                      padding_mode='constant')

    # Random Cropping
    i, j, h, w = transforms.RandomCrop.get_params(image, output_size=crop_size)
    image = transforms_f.crop(image, i, j, h, w)
    label = transforms_f.crop(label, i, j, h, w)
    if logits is not None:
        logits = transforms_f.crop(logits, i, j, h, w)

    if augmentation:
        # Random color jitter
        if torch.rand(1) > 0.2:
            color_transform = transforms.ColorJitter.get_params(
                (0.75, 1.25), (0.75, 1.25), (0.75, 1.25), (-0.25, 0.25))
            image = color_transform(image)

        # Random Gaussian filter
        if torch.rand(1) > 0.5:
            sigma = random.uniform(0.15, 1.15)
            image = image.filter(ImageFilter.GaussianBlur(radius=sigma))

        # Random horizontal flipping
        if torch.rand(1) > 0.5:
            image = transforms_f.hflip(image)
            label = transforms_f.hflip(label)
            if logits is not None:
                logits = transforms_f.hflip(logits)

    # Transform to tensor
    image = transforms_f.to_tensor(image)
    label = (transforms_f.to_tensor(label) * 255).long()
    label[label == 255] = -1  # invalid pixels are re-mapped to index -1
    if logits is not None:
        logits = transforms_f.to_tensor(logits)

    # Apply (ImageNet) normalisation
    image = transforms_f.normalize(image,
                                   mean=[0.485, 0.456, 0.406],
                                   std=[0.229, 0.224, 0.225])
    if logits is not None:
        return image, label, logits
    else:
        return image, label
Beispiel #5
0
 def __call__(self, img):
     return F.crop(img, self.x1, self.y1, self.x2 - self.x1,
                   self.y2 - self.y1)
Beispiel #6
0
def fit(source_image: Image.Image,
        target_size: Union[Tuple[int], int],
        fitting_mode="crop") -> Image.Image:
    """
    Args:
        source_image: PIL Image
        target_size: Tuple of ints (height, width) or single int for square target
        fitting_mode: Either 'crop' or 'pad'.
    """
    source_width, source_height = source_image.size
    if isinstance(target_size, int):
        target_height, target_width = target_size, target_size
    elif isinstance(target_size, tuple) and len(target_size) == 2:
        target_height, target_width = target_size
    else:
        raise TypeError("invalid type of target_size")

    source_ratio = source_height / source_width
    target_ratio = target_height / target_width

    target_image = None
    box_xmin, box_ymin, box_xmax, box_ymax = None, None, None, None

    if fitting_mode == "crop":
        if source_ratio == target_ratio:
            # simple resize
            target_image = resize(source_image, (
                target_height,
                target_width,
            ))
        elif source_ratio > target_ratio:
            # align width, then crop
            overheight = int(source_height * (target_width / source_width))

            target_image = resize(source_image, (overheight, target_width))

            target_image = crop(target_image,
                                int((overheight - target_height) / 2), 0,
                                target_height, target_width)

        elif source_ratio < target_ratio:
            # align height, then crop
            overwidth = int(source_width * (target_height / source_height))

            target_image = resize(source_image, (target_height, overwidth))

            target_image = crop(target_image, 0,
                                int((overwidth - target_width) / 2),
                                target_height, target_width)

            # TODO: Implement crop box info if wanted

    elif fitting_mode == "pad":
        if source_ratio == target_ratio:
            # simple resize
            target_image = resize(source_image, (
                target_height,
                target_width,
            ))

            box_xmin, box_ymin = 0, 0
            box_xmax, box_ymax = target_width - 1, target_height - 1

        elif source_ratio > target_ratio:
            # align height, then pad
            underwidth = int(source_width * (target_height / source_height))

            target_image = resize(source_image, (target_height, underwidth))

            target_image = pad(
                target_image,

                # add 1 in case (target_height - underheight) is odd, so to ensure putting out the desired size
                padding=(  # left, top, right, bottom
                    int((target_width - underwidth) / 2), 0,
                    int((target_width - underwidth) / 2) +
                    (target_width - underwidth) % 2, 0))

            box_xmin, box_ymin = int((target_width - underwidth) / 2), 0
            box_xmax, box_ymax = int((target_width - underwidth) /
                                     2) + underwidth - 1, target_height - 1

        elif source_ratio < target_ratio:
            # align width, then pad
            underheight = int(source_height * (target_width / source_width))

            target_image = resize(source_image, (underheight, target_width))

            target_image = pad(
                target_image,

                # add 1 in case (target_height - underheight) is odd, so to ensure putting out the desired size
                padding=(  # left, top, right, bottom
                    0, int((target_height - underheight) / 2), 0,
                    int((target_height - underheight) / 2) +
                    (target_height - underheight) % 2))

            box_xmin, box_ymin = 0, int((target_height - underheight) / 2)
            box_xmax, box_ymax = target_width - 1, int(
                (target_height - underheight) / 2) + underheight - 1

    assert target_image.size[0] == target_width
    assert target_image.size[1] == target_height

    return target_image, box_xmin, box_ymin, box_xmax, box_ymax
    def __getitem__(self, index):
        sequence_path = self.all_sequence_paths[index]
        df = pd.read_csv(
            sequence_path,
            header=None,
            index_col=False,
            names=['path', 'xmin', 'ymin', 'xmax', 'ymax', 'gazex', 'gazey'])
        show_name = sequence_path.split('/')[-3]
        clip = sequence_path.split('/')[-2]
        seq_len = len(df.index)

        # moving-avg smoothing
        window_size = 11  # should be odd number
        df['xmin'] = myutils.smooth_by_conv(window_size, df, 'xmin')
        df['ymin'] = myutils.smooth_by_conv(window_size, df, 'ymin')
        df['xmax'] = myutils.smooth_by_conv(window_size, df, 'xmax')
        df['ymax'] = myutils.smooth_by_conv(window_size, df, 'ymax')

        if not self.test:
            # cond for data augmentation
            cond_jitter = np.random.random_sample()
            cond_flip = np.random.random_sample()
            cond_color = np.random.random_sample()
            if cond_color < 0.5:
                n1 = np.random.uniform(0.5, 1.5)
                n2 = np.random.uniform(0.5, 1.5)
                n3 = np.random.uniform(0.5, 1.5)
            cond_crop = np.random.random_sample()

            # if longer than seq_len_limit, cut it down to the limit with the init index randomly sampled
            if seq_len > self.seq_len_limit:
                sampled_ind = np.random.randint(0,
                                                seq_len - self.seq_len_limit)
                seq_len = self.seq_len_limit
            else:
                sampled_ind = 0

            if cond_crop < 0.5:
                sliced_x_min = df['xmin'].iloc[sampled_ind:sampled_ind +
                                               seq_len]
                sliced_x_max = df['xmax'].iloc[sampled_ind:sampled_ind +
                                               seq_len]
                sliced_y_min = df['ymin'].iloc[sampled_ind:sampled_ind +
                                               seq_len]
                sliced_y_max = df['ymax'].iloc[sampled_ind:sampled_ind +
                                               seq_len]

                sliced_gaze_x = df['gazex'].iloc[sampled_ind:sampled_ind +
                                                 seq_len]
                sliced_gaze_y = df['gazey'].iloc[sampled_ind:sampled_ind +
                                                 seq_len]

                check_sum = sliced_gaze_x.sum() + sliced_gaze_y.sum()
                all_outside = check_sum == -2 * seq_len

                # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target
                if all_outside:
                    crop_x_min = np.min(
                        [sliced_x_min.min(),
                         sliced_x_max.min()])
                    crop_y_min = np.min(
                        [sliced_y_min.min(),
                         sliced_y_max.min()])
                    crop_x_max = np.max(
                        [sliced_x_min.max(),
                         sliced_x_max.max()])
                    crop_y_max = np.max(
                        [sliced_y_min.max(),
                         sliced_y_max.max()])
                else:
                    crop_x_min = np.min([
                        sliced_gaze_x.min(),
                        sliced_x_min.min(),
                        sliced_x_max.min()
                    ])
                    crop_y_min = np.min([
                        sliced_gaze_y.min(),
                        sliced_y_min.min(),
                        sliced_y_max.min()
                    ])
                    crop_x_max = np.max([
                        sliced_gaze_x.max(),
                        sliced_x_min.max(),
                        sliced_x_max.max()
                    ])
                    crop_y_max = np.max([
                        sliced_gaze_y.max(),
                        sliced_y_min.max(),
                        sliced_y_max.max()
                    ])

                # Randomly select a random top left corner
                if crop_x_min >= 0:
                    crop_x_min = np.random.uniform(0, crop_x_min)
                if crop_y_min >= 0:
                    crop_y_min = np.random.uniform(0, crop_y_min)

                # Get image size
                path = os.path.join(self.data_dir, show_name, clip,
                                    df['path'].iloc[0])
                img = Image.open(path)
                img = img.convert('RGB')
                width, height = img.size

                # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min)
                crop_width_min = crop_x_max - crop_x_min
                crop_height_min = crop_y_max - crop_y_min
                crop_width_max = width - crop_x_min
                crop_height_max = height - crop_y_min
                # Randomly select a width and a height
                crop_width = np.random.uniform(crop_width_min, crop_width_max)
                crop_height = np.random.uniform(crop_height_min,
                                                crop_height_max)
        else:
            sampled_ind = 0


        faces, images, head_channels, heatmaps, paths, gazes, imsizes, gaze_inouts = [], [], [], [], [], [], [], []
        index_tracker = -1
        for i, row in df.iterrows():
            index_tracker = index_tracker + 1
            if not self.test:
                if index_tracker < sampled_ind or index_tracker >= (
                        sampled_ind + self.seq_len_limit):
                    continue

            face_x1 = row['xmin']  # note: Already in image coordinates
            face_y1 = row['ymin']  # note: Already in image coordinates
            face_x2 = row['xmax']  # note: Already in image coordinates
            face_y2 = row['ymax']  # note: Already in image coordinates
            gaze_x = row['gazex']  # note: Already in image coordinates
            gaze_y = row['gazey']  # note: Already in image coordinates

            impath = os.path.join(self.data_dir, show_name, clip, row['path'])
            img = Image.open(impath)
            img = img.convert('RGB')

            width, height = img.size
            imsize = torch.FloatTensor([width, height])
            # imsizes.append(imsize)

            face_x1, face_y1, face_x2, face_y2 = map(
                float, [face_x1, face_y1, face_x2, face_y2])
            gaze_x, gaze_y = map(float, [gaze_x, gaze_y])
            if gaze_x == -1 and gaze_y == -1:
                gaze_inside = False
            else:
                if gaze_x < 0:  # move gaze point that was sliglty outside the image back in
                    gaze_x = 0
                if gaze_y < 0:
                    gaze_y = 0
                gaze_inside = True

            if not self.test:
                ## data augmentation
                # Jitter (expansion-only) bounding box size.
                if cond_jitter < 0.5:
                    k = cond_jitter * 0.1
                    face_x1 -= k * abs(face_x2 - face_x1)
                    face_y1 -= k * abs(face_y2 - face_y1)
                    face_x2 += k * abs(face_x2 - face_x1)
                    face_y2 += k * abs(face_y2 - face_y1)
                    face_x1 = np.clip(face_x1, 0, width)
                    face_x2 = np.clip(face_x2, 0, width)
                    face_y1 = np.clip(face_y1, 0, height)
                    face_y2 = np.clip(face_y2, 0, height)

                # Random Crop
                if cond_crop < 0.5:
                    # Crop it
                    img = TF.crop(img, crop_y_min, crop_x_min, crop_height,
                                  crop_width)

                    # Record the crop's (x, y) offset
                    offset_x, offset_y = crop_x_min, crop_y_min

                    # convert coordinates into the cropped frame
                    face_x1, face_y1, face_x2, face_y2 = face_x1 - offset_x, face_y1 - offset_y, face_x2 - offset_x, face_y2 - offset_y
                    if gaze_inside:
                        gaze_x, gaze_y = (gaze_x- offset_x), \
                                         (gaze_y - offset_y)
                    else:
                        gaze_x = -1
                        gaze_y = -1

                    width, height = crop_width, crop_height

                # Flip?
                if cond_flip < 0.5:
                    img = img.transpose(Image.FLIP_LEFT_RIGHT)
                    x_max_2 = width - face_x1
                    x_min_2 = width - face_x2
                    face_x2 = x_max_2
                    face_x1 = x_min_2
                    if gaze_x != -1 and gaze_y != -1:
                        gaze_x = width - gaze_x

                # Random color change
                if cond_color < 0.5:
                    img = TF.adjust_brightness(img, brightness_factor=n1)
                    img = TF.adjust_contrast(img, contrast_factor=n2)
                    img = TF.adjust_saturation(img, saturation_factor=n3)

            # Face crop
            face = img.copy().crop(
                (int(face_x1), int(face_y1), int(face_x2), int(face_y2)))

            # Head channel image
            head_channel = imutils.get_head_box_channel(
                face_x1,
                face_y1,
                face_x2,
                face_y2,
                width,
                height,
                resolution=self.input_size,
                coordconv=False).unsqueeze(0)
            if self.transform is not None:
                img = self.transform(img)
                face = self.transform(face)

            # Deconv output
            if gaze_inside:
                gaze_x /= float(width)  # fractional gaze
                gaze_y /= float(height)
                gaze_heatmap = torch.zeros(
                    self.output_size,
                    self.output_size)  # set the size of the output
                gaze_map = imutils.draw_labelmap(
                    gaze_heatmap,
                    [gaze_x * self.output_size, gaze_y * self.output_size],
                    3,
                    type='Gaussian')
                gazes.append(torch.FloatTensor([gaze_x, gaze_y]))
            else:
                gaze_map = torch.zeros(self.output_size, self.output_size)
                gazes.append(torch.FloatTensor([-1, -1]))
            faces.append(face)
            images.append(img)
            head_channels.append(head_channel)
            heatmaps.append(gaze_map)
            gaze_inouts.append(torch.FloatTensor([int(gaze_inside)]))

        if self.imshow:
            for i in range(len(faces)):
                fig = plt.figure(111)
                img = 255 - imutils.unnorm(images[i].numpy()) * 255
                img = np.clip(img, 0, 255)
                plt.imshow(np.transpose(img, (1, 2, 0)))
                plt.imshow(imresize(heatmaps[i],
                                    (self.input_size, self.input_size)),
                           cmap='jet',
                           alpha=0.3)
                plt.imshow(imresize(1 - head_channels[i].squeeze(0),
                                    (self.input_size, self.input_size)),
                           alpha=0.2)
                plt.savefig(
                    os.path.join('debug',
                                 'viz_%d_inout=%d.png' % (i, gaze_inouts[i])))
                plt.close('all')

        faces = torch.stack(faces)
        images = torch.stack(images)
        head_channels = torch.stack(head_channels)
        heatmaps = torch.stack(heatmaps)
        gazes = torch.stack(gazes)
        gaze_inouts = torch.stack(gaze_inouts)
        # imsizes = torch.stack(imsizes)
        # print(faces.shape, images.shape, head_channels.shape, heatmaps.shape)

        if self.test:
            return images, faces, head_channels, heatmaps, gazes, gaze_inouts
        else:  # train
            return images, faces, head_channels, heatmaps, gaze_inouts
 def process_one(self, img):
     i, j, h, w = self.get_params(img, self.size)
     out = F.crop(img, i, j, h, w)
     if random.random() < 0.5:
         out = F.hflip(out)
     return self.transformer(out)
    def bbox_augmentations(
            self,
            img,
            bboxes: List[List[int]],
            labels: List[int],
            size=(256, 256),
            scale=(0.08, 1.0),
            ratio=(0.75, 4 / 3),
            interpolation=Image.BILINEAR,
    ):
        """
        Arguments:
            img: PIL Image
            bboxes: list of bounding boxes [[top, left, bot, right], ...]
            size: image size to convert to
            scale: range of size of the origin size cropped
            ratio: range of aspect ratio of the origin aspect ratio cropped
        """
        top, left, bot, right = torchvision.transforms.RandomResizedCrop.get_params(
            img, scale, ratio)
        print("PARAMS =", (top, left, bot, right))
        width, height = img.size
        # print(f"h = {height} w={width}")
        # top, left, bot, right = (0,0, height, width)
        # print("top, left, bot, right",top, left, bot, right)
        # does crop then resize
        # separated to make operations explicit
        img = F.crop(img, top, left, bot, right)
        img = F.resize(img, size=size, interpolation=interpolation)
        # equivalent one line expression
        # img = F.resized_crop(img, top, left, bot, right, size=size, interpolation=interpolation)

        final_boxlist = []
        final_labels = []
        # Assumes box list is [[top, left, bot, right], ...]
        for box, label in zip(bboxes, labels):
            # boxtop, boxleft, boxbot, boxright = box
            boxleft, boxtop, boxright, boxbot = box

            # remove cropped boxes
            if ((left >= boxright) or (top >= boxbot)
                    or ((top + bot + 1) <= boxtop)
                    or ((left + right + 1) <= boxleft)):
                # print("continuing")
                continue

            # cropping
            if top > boxtop:
                boxtop = 0
            else:
                boxtop -= top
            if left > boxleft:
                boxleft = 0
            else:
                boxleft -= left
            if (top + bot) <= boxbot:
                boxbot = bot
            else:
                boxbot -= top
            if (left + right) <= boxright:
                boxright = right
            else:
                boxright -= left

            # resizing
            # to match the same behavior of functional.resize
            boxtop, boxleft, boxbot, boxright = _resize_box(
                size, (bot, right), (boxtop, boxleft, boxbot, boxright))

            # check if zero area
            if ((boxtop - boxbot) * (boxright - boxleft)) == 0:
                continue

            # Point ordering should match https://pytorch.org/docs/stable/_modules/torchvision/models/detection/faster_rcnn.html#fasterrcnn_resnet50_fpn
            final_boxlist.append([boxleft, boxtop, boxright, boxbot])
            final_labels.append(label)

        return img, final_boxlist, final_labels
def augment_image(img, mask, box, net_input_shape, random_scale,
                  random_displacement, random_flip):
    """
    img, mask: numpy array of (height, width, 3)
    box: a [x,y,w,h] box at the center
    net_input_shape: input height and width of network (height, width)
    """
    if len(img.shape) != 3 or img.shape[2] != 3:
        raise ValueError('Expecting image shape to be [H,W,3].')
    if not (len(mask.shape) == 3 and mask.shape[2] == 1):
        raise ValueError('Expecting mask shape to be [H,W,1].')

    img_height = img.shape[0]
    img_width = img.shape[1]
    net_height = net_input_shape[0]
    net_width = net_input_shape[1]

    # random scale
    if random_scale is not None:
        scale_factor = np.random.uniform(random_scale[0], random_scale[1])
    else:
        scale_factor = 1.0
    scaled_height = int(net_height * scale_factor)
    scaled_width = int(net_width * scale_factor)

    # randomly displace a little
    if random_displacement is not None:
        displacement_x = int(
            np.random.uniform(-random_displacement, random_displacement) *
            scaled_width)
        displacement_y = int(
            np.random.uniform(-random_displacement, random_displacement) *
            scaled_height)
    else:
        displacement_x = 0
        displacement_y = 0

    x, y, w, h = box
    x_center, y_center = x + w / 2, y + h / 2
    crop_box = box_utils.int_box([
        x_center - net_width / 2, y_center - net_height / 2, net_width,
        net_height
    ])
    crop_box = box_utils.int_box(box_utils.rescale_box(crop_box, scale_factor))
    crop_box = box_utils.shift_box(crop_box, displacement_x, displacement_y,
                                   img_width, img_height)

    # randomly rotate by 90k degrees, k = 0,1,2,3
    random_hflip = random.randrange(2)
    random_vflip = random.randrange(2)

    # Begin transforms
    # numpy to Torch tensor
    img_aug = torch.from_numpy(img)
    mask_aug = torch.from_numpy(mask)

    # (H,W,C) -> (C,H,W)
    img_aug = img_aug.permute(2, 0, 1)
    mask_aug = mask_aug.permute(2, 0, 1)

    # Crop out the randomly scaled / displaced box
    x, y, w, h = crop_box
    img_aug = ttf.crop(img_aug, top=y, left=x, height=h, width=w)
    mask_aug = ttf.crop(mask_aug, top=y, left=x, height=h, width=w)

    # Resize to network size
    img_aug = ttf.resize(img_aug, [net_height, net_width])
    mask_aug = ttf.resize(mask_aug, [net_height, net_width])

    # Flip if needed
    if random_flip:
        if random_hflip > 0:
            img_aug = ttf.hflip(img_aug)
            mask_aug = ttf.hflip(mask_aug)
        if random_vflip > 0:
            img_aug = ttf.vflip(img_aug)
            mask_aug = ttf.vflip(mask_aug)

    return img_aug, mask_aug
    def __getitem__(self, index):
        """
        Return one sample and its label and extra information that we need later.

        :param index: int, the index of the sample within the whole dataset.
        :return: sample: pytorch.tensor of size (1, C, H, W) and datatype torch.FloatTensor. Where C is the number of
                 color channels (=3), and H is the height of the patch, and W is its width.
                 mask: PIL.Image.Image, the mask of the regions of interest.
                 label: int, the label of the sample.
        """
        # Force seeding: a workaround to deal with reproducibility when suing different number of workers if want to
        # preserve the reproducibility. Each sample has its won seed.
        reproducibility.force_seed(self.seeds[index])

        if self.set_for_eval:
            error_msg = "Something wrong. You didn't ask to set the data ready for evaluation, but here we are " \
                        ".... [NOT OK]"
            assert self.inputs_ready is not None and self.labels_ready is not None, error_msg
            img = self.inputs_ready[index]
            mask = self.masks_ready[index]
            target = self.labels_ready[index]

            return img, mask, target

        if self.do_not_save_samples:
            img, mask, target = self.load_sample_i(index)
        else:
            assert self.preloaded, "Sorry, you need to preload the data first .... [NOT OK]"
            img, mask, target = self.images[index], self.masks[
                index], self.labels[index]
        # Upscale on the fly. Sorry, this may add an extra time, but, we do not want to save in memory upscaled
        # images!!!! it takes a lot of space, especially for large datasets. So, compromise? upscale only when
        # necessary.
        # check if we need to upscale the image. Useful for Caltech-UCSD-Birds-200-2011.
        if self.up_scale_small_dim_to is not None:
            w, h = img.size
            w_up, h_up = self.get_upscaled_dims(w, h,
                                                self.up_scale_small_dim_to)
            img = img.resize((w_up, h_up), resample=PIL.Image.BILINEAR)

        # Upscale the image: only for Caltech-UCSD-Birds-200-2011.

        if self.randomCropper:  # training only. Do not crop for evaluation.
            # Padding.
            if self.padding_size:
                w, h = img.size
                ph, pw = self.padding_size
                padding = (int(pw * w), int(ph * h))
                img = TF.pad(img,
                             padding=padding,
                             padding_mode=self.padding_mode)
                mask = TF.pad(
                    mask, padding=padding,
                    padding_mode=self.padding_mode)  # just for tracking.

            img, (i, j, h, w) = self.randomCropper(img)
            # print("Dadaloader Index {} i  {}  j {} seed {}".format(index, i, j, self.seeds[index]))
            # crop the mask
            mask = TF.crop(
                mask, i, j, h,
                w)  # just for tracking. Not used for actual training.

        # Pad the image to be div. by 32 in both sides.
        if self.force_div_32:
            w, h = img.size
            pad_left, pad_right = self.get_padding(w, 32)
            pad_top, pad_bottom = self.get_padding(h, 32)
            padding = (pad_left, pad_top, pad_right, pad_bottom)
            img = TF.pad(img, padding=padding, padding_mode="reflect")
            # This is not necessary in training nor in test. It may be necessary during training if your patch size
            # is not dividable by 32 and you want to make it dividable by 32.
            # We are going to comment this.
            # if not self.set_for_eval_backup:  # we want to keep the mask intact for evaluation.
            # just for tracking. Not used for training.
            #    mask = TF.pad(mask, padding=padding, padding_mode="reflect")

        if self.transform_img:  # just for training: do not transform the mask (since it is not used).
            img = self.transform_img(img)

        if self.transform_tensor:  # just for training: do not transform the mask (since it is not used).
            img = self.transform_tensor(img)

        # Prepare the mask to be used on GPU to compute Dice index.
        mask = np.array(mask, dtype=np.float32) / 255.  # full of 0 and 1.
        mask = self.to_tensor(np.expand_dims(
            mask, axis=-1))  # mak the mask with shape (h, w, 1).

        return img, mask, target
    def __getitem__(self, index):
        sample_path = self.data_list[index].split()
        img = Image.open(os.path.join(
            self.dir_imgs, sample_path[self.idx_img])).convert("RGB")
        lidar = None
        depth = None
        item = []

        if self.mode == 'train':
            depth = read_depth(
                os.path.join(self.dir_imgs, sample_path[self.idx_depth]),
                self.dataset_name, self.max_depth)
            if self.lidar_exist:
                lidar = read_depth(
                    os.path.join(self.dir_imgs, sample_path[self.idx_lidar]),
                    self.dataset_name, self.max_depth)
            else:
                if self.gen_sparse_online:
                    lidar = self.to_sparse(image=depth)
                else:
                    lidar = self.lidar_persudo
            # show(depth), show(lidar), show(img)

            # 增强
            rsz_size = img.size[::
                                -1] if self.resize_size is None else self.resize_size  # h*w
            crp_size = img.size[::
                                -1] if self.crop_size is None else self.crop_size  # h*w
            depth_rsz = transforms.Compose(
                [transforms.ToPILImage(),
                 transforms.Resize(rsz_size, 0)])  # 不用插值
            img = transforms.Resize(rsz_size)(img)  # 默认resize为双线性插值
            depth = depth_rsz(depth)
            lidar = depth_rsz(lidar)

            # kitti的上部没有值,先裁剪掉
            if self.dataset_name == 'kitti':
                img = F.crop(img, rsz_size[0] - crp_size[0], 0, crp_size[0],
                             rsz_size[1])
                depth = F.crop(depth, rsz_size[0] - crp_size[0], 0,
                               crp_size[0], rsz_size[1])
                lidar = F.crop(lidar, rsz_size[0] - crp_size[0], 0,
                               crp_size[0], rsz_size[1])

            img = np.asarray(img, dtype=np.float32) / 255.0
            depth = np.asarray(depth)
            lidar = np.asarray(lidar)
            # li = cv2.resize(lidar.astype(np.uint16), rsz_size[::-1], 0)  # opencv的resize会增大稀疏点的比例
            if self.aug:
                img, depth, lidar = self.augment_3(img, depth, lidar, crp_size,
                                                   self.degree)

            # 标准化
            img = self.img_process(img.copy())
            depth = self.to_tensor(depth.copy())
            lidar = self.to_tensor(lidar.copy())
            item = [img, lidar, depth]
        elif self.mode == 'val':
            depth = read_depth(
                os.path.join(self.dir_imgs, sample_path[self.idx_depth]),
                self.dataset_name, self.max_depth)
            if self.lidar_exist:
                lidar = read_depth(
                    os.path.join(self.dir_imgs, sample_path[self.idx_lidar]),
                    self.dataset_name, self.max_depth)
            else:
                if self.gen_sparse_online:
                    lidar = self.to_sparse(image=depth)
                else:
                    lidar = self.lidar_persudo
            img = np.asarray(img, dtype=np.float32) / 255.0

            # pad为网络可输入的大小
            h_ori, w_ori = img.shape[0], img.shape[1]
            h_pad = int(np.ceil(h_ori / self.mul_times) * self.mul_times)
            w_pad = int(np.ceil(w_ori / self.mul_times) * self.mul_times)
            img = iaa.CenterPadToFixedSize(height=h_pad,
                                           width=w_pad)(image=img)
            lidar = iaa.CenterPadToFixedSize(height=h_pad,
                                             width=w_pad)(image=lidar)
            depth = iaa.CenterPadToFixedSize(height=h_pad,
                                             width=w_pad)(image=depth)
            lidar = lidar.astype(np.float32)
            depth = depth.astype(np.float32)

            # 标准化
            img = self.img_process(img.copy())
            depth = self.to_tensor(depth.copy())
            lidar = self.to_tensor(lidar.copy())
            item = [img, lidar, depth]
        elif self.mode == 'test':
            if self.lidar_exist:
                lidar = read_depth(
                    os.path.join(self.dir_imgs, sample_path[self.idx_lidar]),
                    self.dataset_name, self.max_depth)
            else:
                lidar = self.lidar_persudo
            img = np.asarray(img, dtype=np.float32) / 255.0

            # pad为网络可输入的大小
            h_ori, w_ori = img.shape[0], img.shape[1]
            h_pad = int(np.ceil(h_ori / self.mul_times) * self.mul_times)
            w_pad = int(np.ceil(w_ori / self.mul_times) * self.mul_times)
            img = iaa.CenterPadToFixedSize(height=h_pad,
                                           width=w_pad)(image=img)
            lidar = iaa.CenterPadToFixedSize(height=h_pad,
                                             width=w_pad)(image=lidar)
            lidar = lidar.astype(np.float32)

            # 标准化
            img = self.img_process(img.copy())
            lidar = self.to_tensor(lidar.copy())
            item = [img, lidar, lidar]
        return item
Beispiel #13
0
    def __getitem__(self, idx):
        """
        Function to get a sample from the dataset. First both RGB and Semantic images are read in PIL format. Then
        transformations are applied from PIL to Numpy arrays to Tensors.

        For regular usage:
            - Images should be outputed with dimensions (3, W, H)
            - Semantic Images should be outputed with dimensions (1, W, H)

        In the case that 10-crops are used:
            - Images should be outputed with dimensions (10, 3, W, H)
            - Semantic Images should be outputed with dimensions (10, 1, W, H)

        :param idx: Index
        :return: Dictionary containing {RGB image, semantic segmentation mask, scene category index}
        """

        # Get RGB image path and load it
        img_name = os.path.join(self.image_dir, self.set, self.labels[idx],
                                self.filenames[idx])
        img = Image.open(img_name)

        # Convert it to RGB if gray-scale
        if img.mode is not "RGB":
            img = img.convert("RGB")

        # Load semantic segmentation mask
        filename_sem = self.filenames[idx][0:self.filenames[idx].find('.jpg')]
        sem_name = os.path.join(self.image_dir, "noisy_annotations_RGB",
                                self.set, self.labels[idx],
                                (filename_sem + ".png"))
        sem = Image.open(sem_name)

        # Load semantic segmentation scores
        filename_scores = self.filenames[idx][0:self.filenames[idx].find('.jpg'
                                                                         )]
        sem_score_name = os.path.join(self.image_dir, "noisy_scores_RGB",
                                      self.set, self.labels[idx],
                                      (filename_scores + ".png"))
        semScore = Image.open(sem_score_name)

        # Apply transformations depending on the set (train, val)
        if self.set is "train":
            # Define Random crop. If image is smaller resize first.
            bilinearResize_trans = transforms.Resize(self.resizeSize)
            nearestResize_trans = transforms.Resize(
                self.resizeSize, interpolation=Image.NEAREST)

            img = bilinearResize_trans(img)
            sem = nearestResize_trans(sem)
            semScore = bilinearResize_trans(semScore)

            # Extract Random Crop parameters
            i, j, h, w = transforms.RandomCrop.get_params(
                img, output_size=(self.outputSize, self.outputSize))
            # Apply Random Crop parameters
            img = TF.crop(img, i, j, h, w)
            sem = TF.crop(sem, i, j, h, w)
            semScore = TF.crop(semScore, i, j, h, w)

            # Random horizontal flipping
            if random.random() > 0.5:
                img = TF.hflip(img)
                sem = TF.hflip(sem)
                semScore = TF.hflip(semScore)

            # Apply transformations from ImgAug library
            img = np.asarray(img)
            sem = np.asarray(sem)
            semScore = np.asarray(semScore)

            img = np.squeeze(
                self.seq.augment_images(np.expand_dims(img, axis=0)))
            if self.SemRGB:
                sem = np.squeeze(
                    self.seq_sem.augment_images(np.expand_dims(sem, 0)))
                semScore = np.squeeze(
                    self.seq_sem.augment_images(np.expand_dims(semScore, 0)))
            else:
                sem = np.squeeze(
                    self.seq_sem.augment_images(
                        np.expand_dims(np.expand_dims(sem, 0), 3)))
                semScore = np.squeeze(
                    self.seq_sem.augment_images(
                        np.expand_dims(np.expand_dims(semScore, 0), 3)))

            # Apply not random transforms. To tensor and normalization for RGB. To tensor for semantic segmentation.
            img = self.train_transforms_img(img)
            sem = self.train_transforms_sem(sem)
            semScore = self.train_transforms_scores(semScore)
        else:
            img = self.val_transforms_img(img)
            sem = self.val_transforms_sem(sem)
            semScore = self.val_transforms_scores(semScore)

        # Final control statements
        if not self.TenCrop:
            if not self.SemRGB:
                assert img.shape[0] == 3 and img.shape[
                    1] == self.outputSize and img.shape[2] == self.outputSize
                assert sem.shape[0] == 1 and sem.shape[
                    1] == self.outputSize and sem.shape[2] == self.outputSize
                assert semScore.shape[0] == 1 and semScore.shape[
                    1] == self.outputSize and semScore.shape[
                        2] == self.outputSize
            else:
                assert img.shape[0] == 3 and img.shape[
                    1] == self.outputSize and img.shape[2] == self.outputSize
                assert sem.shape[0] == 3 and sem.shape[
                    1] == self.outputSize and sem.shape[2] == self.outputSize
                assert semScore.shape[0] == 3 and semScore.shape[
                    1] == self.outputSize and semScore.shape[
                        2] == self.outputSize
        else:
            if not self.SemRGB:
                assert img.shape[0] == 10 and img.shape[
                    2] == self.outputSize and img.shape[3] == self.outputSize
                assert sem.shape[0] == 10 and sem.shape[
                    2] == self.outputSize and sem.shape[3] == self.outputSize
                assert semScore.shape[0] == 10 and semScore.shape[
                    2] == self.outputSize and semScore.shape[
                        3] == self.outputSize
            else:
                assert img.shape[0] == 10 and img.shape[
                    2] == self.outputSize and img.shape[3] == self.outputSize
                assert sem.shape[0] == 10 and sem.shape[
                    2] == self.outputSize and sem.shape[3] == self.outputSize
                assert semScore.shape[0] == 10 and semScore.shape[
                    2] == self.outputSize and semScore.shape[
                        3] == self.outputSize

        # Create dictionary
        self.sample = {
            'Image': img,
            'Semantic': sem,
            'Semantic Scores': semScore,
            'Scene Index': self.classes.index(self.labels[idx])
        }

        return self.sample
Beispiel #14
0
 def torchvision_transform(self, img):
     img = torchvision.crop(img, top=0, left=0, height=64, width=64)
     return torchvision.resize(img, (512, 512))
    def __getitem__(self, index):
        if self.test:
            g = self.X_test.get_group(self.keys[index])
            cont_gaze = []
            for i, row in g.iterrows():
                path = row['path']
                x_min = row['bbox_x_min']
                y_min = row['bbox_y_min']
                x_max = row['bbox_x_max']
                y_max = row['bbox_y_max']
                eye_x = row['eye_x']
                eye_y = row['eye_y']
                gaze_x = row['gaze_x']
                gaze_y = row['gaze_y']
                cont_gaze.append([gaze_x, gaze_y
                                  ])  # all ground truth gaze are stacked up
            for j in range(len(cont_gaze), 20):
                cont_gaze.append(
                    [-1,
                     -1])  # pad dummy gaze to match size for batch processing
            cont_gaze = torch.FloatTensor(cont_gaze)
            gaze_inside = True  # always consider test samples as inside

        else:
            path = self.X_train.iloc[index]
            eye_x, eye_y, gaze_x, gaze_y = self.y_train.iloc[index]
            gaze_inside = True  # bool(inout)

        img = Image.open(os.path.join(self.data_dir, path))
        img = img.convert('RGB')
        width, height = img.size
        # print('gaze coords: ', type(gaze_x), type(gaze_y), gaze_x, gaze_y)
        # print('eye coords: ', type(eye_x), type(eye_y), eye_x, eye_y)
        # expand face bbox a bit
        k = 0.1
        x_min = (eye_x - 0.15) * width
        y_min = (eye_y - 0.15) * height
        x_max = (eye_x + 0.15) * width
        y_max = (eye_y + 0.15) * height
        if x_min < 0:
            x_min = 0
        if y_min < 0:
            y_min = 0
        if x_max < 0:
            x_max = 0
        if y_max < 0:
            y_max = 0
        x_min -= k * abs(x_max - x_min)
        y_min -= k * abs(y_max - y_min)
        x_max += k * abs(x_max - x_min)
        y_max += k * abs(y_max - y_min)

        # x_min = eye_x - 0.15
        # y_min = eye_y - 0.15
        # x_max = eye_x + 0.15
        # y_max = eye_y + 0.15
        # if x_min < 0:
        #     x_min = 0
        # if y_min < 0:
        #     y_min = 0
        # if x_max < 0:
        #     x_max = 0
        # if y_max < 0:
        #     y_max = 0

        # print('bbx',  [x_min, y_min, x_max, y_max])

        x_min, y_min, x_max, y_max = map(float, [x_min, y_min, x_max, y_max])
        # print(x_min, y_min, x_max, y_max)
        if self.imshow:
            img.save("origin_img.jpg")

        if self.test:
            imsize = torch.IntTensor([width, height])
        else:
            ## data augmentation

            # Jitter (expansion-only) bounding box size
            if np.random.random_sample() <= 0.5:
                k = np.random.random_sample() * 0.2
                x_min -= k * abs(x_max - x_min)
                y_min -= k * abs(y_max - y_min)
                x_max += k * abs(x_max - x_min)
                y_max += k * abs(y_max - y_min)

            # Random Crop
            if np.random.random_sample() <= 0.5:
                # Calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target
                crop_x_min = np.min([gaze_x * width, x_min, x_max])
                crop_y_min = np.min([gaze_y * height, y_min, y_max])
                crop_x_max = np.max([gaze_x * width, x_min, x_max])
                crop_y_max = np.max([gaze_y * height, y_min, y_max])

                # Randomly select a random top left corner
                if crop_x_min >= 0:
                    crop_x_min = np.random.uniform(0, crop_x_min)
                if crop_y_min >= 0:
                    crop_y_min = np.random.uniform(0, crop_y_min)

                # Find the range of valid crop width and height starting from the (crop_x_min, crop_y_min)
                crop_width_min = crop_x_max - crop_x_min
                crop_height_min = crop_y_max - crop_y_min
                crop_width_max = width - crop_x_min
                crop_height_max = height - crop_y_min
                # Randomly select a width and a height
                crop_width = np.random.uniform(crop_width_min, crop_width_max)
                crop_height = np.random.uniform(crop_height_min,
                                                crop_height_max)

                # Crop it
                img = TF.crop(img, crop_y_min, crop_x_min, crop_height,
                              crop_width)

                # Record the crop's (x, y) offset
                offset_x, offset_y = crop_x_min, crop_y_min

                # convert coordinates into the cropped frame
                x_min, y_min, x_max, y_max = x_min - offset_x, y_min - offset_y, x_max - offset_x, y_max - offset_y
                # if gaze_inside:
                gaze_x, gaze_y = (gaze_x * width - offset_x) / float(crop_width), \
                                 (gaze_y * height - offset_y) / float(crop_height)
                # else:
                #     gaze_x = -1; gaze_y = -1

                width, height = crop_width, crop_height

            # Random flip
            if np.random.random_sample() <= 0.5:
                img = img.transpose(Image.FLIP_LEFT_RIGHT)
                x_max_2 = width - x_min
                x_min_2 = width - x_max
                x_max = x_max_2
                x_min = x_min_2
                gaze_x = 1 - gaze_x

            # Random color change
            if np.random.random_sample() <= 0.5:
                img = TF.adjust_brightness(img,
                                           brightness_factor=np.random.uniform(
                                               0.5, 1.5))
                img = TF.adjust_contrast(img,
                                         contrast_factor=np.random.uniform(
                                             0.5, 1.5))
                img = TF.adjust_saturation(img,
                                           saturation_factor=np.random.uniform(
                                               0, 1.5))
        # print('bbx2',  [x_min, y_min, x_max, y_max])

        head_channel = imutils.get_head_box_channel(
            x_min,
            y_min,
            x_max,
            y_max,
            width,
            height,
            resolution=self.input_size,
            coordconv=False).unsqueeze(0)

        # Crop the face
        face = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

        if self.imshow:
            img.save("img_aug.jpg")
            face.save('face_aug.jpg')

        if self.transform is not None:
            img = self.transform(img)
            face = self.transform(face)
        # print('imsize2', img.size())

        # generate the heat map used for deconv prediction
        gaze_heatmap = torch.zeros(
            self.output_size, self.output_size)  # set the size of the output
        # print([gaze_x * self.output_size, gaze_y * self.output_size])
        # print(self.output_size)
        if self.test:  # aggregated heatmap
            num_valid = 0
            for gaze_x, gaze_y in cont_gaze:
                if gaze_x != -1:
                    num_valid += 1
                    gaze_heatmap = imutils.draw_labelmap(
                        gaze_heatmap,
                        [gaze_x * self.output_size, gaze_y * self.output_size],
                        3,
                        type='Gaussian')
            gaze_heatmap /= num_valid
        else:
            # if gaze_inside:
            gaze_heatmap = imutils.draw_labelmap(
                gaze_heatmap,
                [gaze_x * self.output_size, gaze_y * self.output_size],
                3,
                type='Gaussian')

        if self.imshow:
            fig = plt.figure(111)
            img = 255 - imutils.unnorm(img.numpy()) * 255
            img = np.clip(img, 0, 255)
            plt.imshow(np.transpose(img, (1, 2, 0)))
            plt.imshow(imresize(gaze_heatmap,
                                (self.input_size, self.input_size)),
                       cmap='jet',
                       alpha=0.3)
            plt.imshow(imresize(1 - head_channel.squeeze(0),
                                (self.input_size, self.input_size)),
                       alpha=0.2)
            plt.savefig('viz_aug.png')

        if self.test:
            return img, face, head_channel, gaze_heatmap, cont_gaze, imsize, path
        else:
            return img, face, head_channel, gaze_heatmap, path, gaze_inside
Beispiel #16
0
 def torchvision(self, img):
     return torchvision.crop(img, i=0, j=0, h=64, w=64)
Beispiel #17
0
    def __call__(self, img: Image.Image):
        if self.params is None:
            self.params = T.RandomCrop.get_params(img, self.out_size)

        return TF.crop(img, *self.params)
 def __call__(self, imgs):
     i, j, h, w = self.get_params(imgs[0], self.size)
     out = [F.crop(img, i, j, h, w) for img in imgs]
     if random.random() < 0.5:
         out = [F.hflip(img) for img in out]
     return [self.transformer(img) for img in out]
Beispiel #19
0
 def __call__(self, img, label):
     crop_x = np.random.randint(low=0, high=33)
     crop_y = np.random.randint(low=0, high=33)
     return F.crop(img, crop_y, crop_x, 224, 224), label
Beispiel #20
0
    def __call__(self, images):
        name = images["name"]
        x = images["x"]
        y = images["y"]
        if self.nuc:
            n = images["n"]

        #  check to see if y contains nucleoli and reset the process if it does not
        if self.Random_Crops:
            wpix = 0
            while wpix < self.Wpix_Threshold:
                # Random crops
                i, j, h, w = transforms.RandomCrop.get_params(
                    x, output_size=self.crop_size)
                x = TF.crop(x, i, j, h, w)
                y = TF.crop(y, i, j, h, w)
                y_array = np.array(y) / 255
                if self.nuc:
                    n = TF.crop(n, i, j, h, w)
                wpix = np.sum(y_array) / (y_array.shape[0] * y_array.shape[1])
                if wpix < self.Wpix_Threshold:
                    local_keep_prob = random.random()
                    if local_keep_prob < self.Keep_Prob:
                        break
                    else:
                        x = images["x"]
                        y = images["y"]

        if self.augment:
            # Random horizontal flip
            if random.random() > 0.5:
                x = TF.hflip(x)
                y = TF.hflip(y)
                if self.nuc:
                    n = TF.hflip(n)

            # Random vertical flip
            if random.random() > 0.5:
                x = TF.vflip(x)
                y = TF.vflip(y)
                if self.nuc:
                    n = TF.vflip(n)

            # Random Rotation
            d = random.randint(-180, 180)
            x = TF.rotate(x, d)
            y = TF.rotate(y, d)
            if self.nuc:
                n = TF.rotate(n, d)

        x = TF.to_tensor(x)
        y = TF.to_tensor(y)
        if self.nuc:
            n = TF.to_tensor(n)

        if not self.nuc:
            pair = {"x": x, "y": y, "name": name}
        else:
            pair = {"x": x, "y": y, "n": n, "name": name}

        return pair
Beispiel #21
0
def crop_image(image_np, coors, crop_size=224):
    image_pil = Image.fromarray(image_np)
    top_left = [max(0, x - crop_size // 2) for x in coors[::-1]]
    image_crop = crop(image_pil, *top_left, crop_size, crop_size)

    return image_crop
# Image size: 320 x 240

letters = [
    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p',
    'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y'
]

tt = transforms.ToTensor()

improve_letters = ['q']

source_path = "./test"
dest_path = "./test2"

for letter in improve_letters:
    for i in range(20):
        image1 = Image.open(
            os.path.join(source_path, letter,
                         str(i) + "_1.png"))
        image2 = Image.open(
            os.path.join(source_path, letter,
                         str(i) + "_2.png"))
        image1 = functional.crop(image1, 0, 15, 240, 240)
        image2 = functional.crop(image2, 0, 15, 240, 240)
        utils.save_image(tt(image1),
                         os.path.join(dest_path, letter,
                                      str(i) + "_1.png"))
        utils.save_image(tt(image2),
                         os.path.join(dest_path, letter,
                                      str(i) + "_2.png"))
    def __init__(self, dataroot, train=True, augment=True):
        self.images = []
        self.bubbles = []
        self.labels = []

        dataset = []  # contains tuples of images and associated 360 bubbles
        if train:
            file = np.loadtxt(os.path.join(dataroot, "dataset_train.txt"),
                              dtype=str,
                              skiprows=3)
        else:
            file = np.loadtxt(os.path.join(dataroot, "dataset_test.txt"),
                              dtype=str,
                              skiprows=3)

        # load image pairs and create training/validation labels
        for pair in file:
            if 'right' in pair[1]:
                self.labels.extend([i for i in range(3, 6)])
            elif 'left' in pair[1]:
                self.labels.extend([i for i in range(0, 3)])

            dataset.extend([pair for i in range(3)])

        # calculate dataset length
        self.data_len = len(dataset)

        # transformations when loading images:
        PIL = transforms.ToPILImage()
        resize = transforms.Compose([transforms.Resize(300)])
        bub_size = transforms.Resize(500)
        if augment:
            self.image_trans = transforms.Compose([
                transforms.RandomCrop(224),
                transforms.ColorJitter(brightness=0.5,
                                       contrast=0.5,
                                       saturation=0.5,
                                       hue=0.05),
                transforms.ToTensor()
            ])
            self.bubble_trans = transforms.Compose([
                transforms.ColorJitter(brightness=0.5,
                                       contrast=0.5,
                                       saturation=0.5,
                                       hue=0.05),
                transforms.ToTensor()
            ])
        else:
            self.image_trans = transforms.Compose(
                [transforms.Resize(300),
                 transforms.ToTensor()])
            self.bubble_trans = transforms.Compose([transforms.ToTensor()])

        # load images and applying initial preprocessing
        for i, (bubble, image) in enumerate(dataset):
            image = io.imread(os.path.join(dataroot, image),
                              plugin='matplotlib')
            bubble = io.imread(os.path.join(dataroot, bubble),
                               plugin='matplotlib')
            label = torch.tensor(self.labels[i])

            # cropping parameters in height and width, this assumes images of shape (2100, 2800)
            params = [1300, 1300]

            # set left pixel of the image crop depending on label
            if label == 0 or label == 3:
                width = 0
            elif label == 1 or label == 4:
                width = int((image.shape[1] - params[1]) / 2)
            elif label == 2 or label == 5:
                width = int(image.shape[1] - (params[1] + 1))

            # set height of image crop
            height = int((image.shape[0] - params[0]) / 2)

            # pre-process image files
            image = PIL(image)
            image = TF.crop(image, height, width, params[0], params[1])
            image = resize(image)
            self.images.append(image)

            # preprocess bubble and add the array
            bubble = PIL(bubble)
            bubble = bub_size(bubble)
            self.bubbles.append(bubble)
Beispiel #24
0
    def __call__(self, sample):
        image, label, label_c, label_m, label_gt = \
                sample['image'], sample['label'], sample['label_c'], sample['label_m'], sample['label_gt']
        if self.precise_contour:
            pil_masks = sample['pil_masks']
        weight = None

        if self.augment:
            if self.color_equalize and random.random() > 0.5:
                image = clahe(image)

            # perform RandomResize() or just enlarge for image size < model input size
            if random.random() > 0.5:
                new_size = int(
                    random.uniform(self.min_scale, self.max_scale) *
                    np.min(image.size))
            else:
                new_size = int(np.min(image.size))
            if new_size < np.max(self.size):  # make it viable for cropping
                new_size = int(np.max(self.size))
            image, label, label_c, label_m = [
                tx.resize(x, new_size)
                for x in (image, label, label_c, label_m)
            ]
            if self.precise_contour:
                # regenerate all resized masks (bilinear interpolation) and compose them afterwards
                pil_masks = [tx.resize(m, new_size) for m in pil_masks]
                label_gt = compose_mask(pil_masks, pil=True)
            else:
                # label_gt use NEAREST instead of BILINEAR (default) to avoid polluting instance labels after augmentation
                label_gt = tx.resize(label_gt,
                                     new_size,
                                     interpolation=Image.NEAREST)

            # perform RandomCrop()
            i, j, h, w = transforms.RandomCrop.get_params(image, self.size)
            image, label, label_c, label_m, label_gt = [
                tx.crop(x, i, j, h, w)
                for x in (image, label, label_c, label_m, label_gt)
            ]
            if self.precise_contour:
                pil_masks = [tx.crop(m, i, j, h, w) for m in pil_masks]

            # Note: RandomResizedCrop() is popularly used to train the Inception networks, but might not the best choice for segmentation?
            # # perform RandomResizedCrop()
            # i, j, h, w = transforms.RandomResizedCrop.get_params(
            #     image,
            #     scale=(0.5, 1.0)
            #     ratio=(3. / 4., 4. / 3.)
            # )
            # # label_gt use NEAREST instead of BILINEAR (default) to avoid polluting instance labels after augmentation
            # image, label, label_c, label_m = [tx.resized_crop(x, i, j, h, w, self.size) for x in (image, label, label_c, label_m)]
            # label_gt = tx.resized_crop(label_gt, i, j, h, w, self.size, interpolation=Image.NEAREST)

            # perform Elastic Distortion
            if self.elastic_distortion and random.random() > 0.75:
                indices = ElasticDistortion.get_params(image)
                image, label, label_c, label_m = [
                    ElasticDistortion.transform(x, indices)
                    for x in (image, label, label_c, label_m)
                ]
                if self.precise_contour:
                    pil_masks = [
                        ElasticDistortion.transform(m, indices)
                        for m in pil_masks
                    ]
                    label_gt = compose_mask(pil_masks, pil=True)
                else:
                    label_gt = ElasticDistortion.transform(
                        label_gt, indices, spline_order=0
                    )  # spline_order=0 to avoid polluting instance labels

            # perform RandomHorizontalFlip()
            if random.random() > 0.5:
                image, label, label_c, label_m, label_gt = [
                    tx.hflip(x)
                    for x in (image, label, label_c, label_m, label_gt)
                ]

            # perform RandomVerticalFlip()
            if random.random() > 0.5:
                image, label, label_c, label_m, label_gt = [
                    tx.vflip(x)
                    for x in (image, label, label_c, label_m, label_gt)
                ]

            # perform Random Rotation (0, 90, 180, and 270 degrees)
            random_degree = random.randint(0, 3) * 90
            image, label, label_c, label_m, label_gt = [
                tx.rotate(x, random_degree)
                for x in (image, label, label_c, label_m, label_gt)
            ]

            # perform random color invert, assuming 3 channels (rgb) images
            if self.color_invert and random.random() > 0.5:
                image = ImageOps.invert(image)

            # perform ColorJitter()
            if self.color_jitter and random.random() > 0.5:
                color = transforms.ColorJitter.get_params(0.5, 0.5, 0.5, 0.25)
                image = color(image)

        elif self.resize:  # resize down image
            image, label, label_c, label_m = [
                tx.resize(x, self.size)
                for x in (image, label, label_c, label_m)
            ]
            if self.precise_contour:
                pil_masks = [tx.resize(m, self.size) for m in pil_masks]
                label_gt = compose_mask(pil_masks, pil=True)
            else:
                label_gt = tx.resize(label_gt,
                                     self.size,
                                     interpolation=Image.NEAREST)

        # replaced with 'thinner' contour based on augmented/transformed mask
        if self.detect_contour:
            label_c, label_m, weight = get_instances_contour_interior(
                np.asarray(label_gt))
            label_c, label_m = Image.fromarray(label_c), Image.fromarray(
                label_m)

        # Due to resize algorithm may introduce anti-alias edge, aka. non binary value,
        # thereafter map every pixel back to 0 and 255
        if self.label_binary:
            label, label_c, label_m = [
                x.point(lambda p, threhold=100: 255 if p > threhold else 0)
                for x in (label, label_c, label_m)
            ]
            # For train contour only, leverage the merged instances contour label (label_c)
            # the side effect is losing instance count information
            if self.only_contour:
                label_gt = label_c

        # perform ToTensor()
        if self.tensor:
            image, label, label_c, label_m, label_gt = \
                    [tx.to_tensor(x) for x in (image, label, label_c, label_m, label_gt)]
            # perform Normalize()
            image = tx.normalize(image, self.mean, self.std)

        # prepare a shadow copy of composed data to avoid screwup cached data
        x = sample.copy()
        x['image'], x['label'], x['label_c'], x['label_m'], x['label_gt'] = \
                image, label, label_c, label_m, label_gt

        if self.weight_map and weight is not None:
            weight = np.expand_dims(weight, 0)
            x['weight'] = torch.from_numpy(weight)

        if 'pil_masks' in x:
            del x['pil_masks']

        return x
Beispiel #25
0
 def __call__(self, img):
     i, j, h, w = self.params
     return F.crop(img, i, j, h, w)
Beispiel #26
0
    def __call__(self, data):
        hr, lr = data
        x, y, h, w = self.setting_window(hr, self.crop_size)

        return F.crop(hr, x, y, h, w), F.crop(lr, x, y, h, w)
Beispiel #27
0
def get_indexes(mask_height, boxes_params, batch_size, steps):
    pi = torch.as_tensor(np.pi)
    batch_edges = []
    batch_edges_left = []
    batch_edges_right = []
    batch_edges_top = []
    batch_edges_bottom = []
    for i in range(batch_size):
        img = mask_height[i]
        edges = []
        left_edges = []
        right_edges = []
        top_edges = []
        bottom_edges = []
        for j in range(steps):
            y = boxes_params[i][j][0]
            x = boxes_params[i][j][1]
            angle = boxes_params[i][j][2]
            width = boxes_params[i][j][3]
            length = boxes_params[i][j][4]
            # 如果宽度过小,就直接视为不可行
            if width < 5:
                edges.append(0)
                left_edges.append(0)
                right_edges.append(0)
                top_edges.append(0)
                bottom_edges.append(0)
                continue
            top = int(y - length / 2)
            left = int(x - width / 2)
            rt_angle = -float((angle / pi * 180))

            rectified_img = VisionF.rotate(img=img.view(1, 1, 300, 300),
                                           angle=rt_angle,
                                           center=(x, y))

            crop_img = VisionF.crop(rectified_img, top, left, int(length),
                                    int(width))

            resized_img = VisionF.resize(
                crop_img, [50, 100]).squeeze().cpu().data.numpy()

            # 获取图像各边缘宽度
            edge, edge_left, edge_right, edge_top, edge_bottom = get_edge(
                resized_img)

            edges.append(edge)
            left_edges.append(edge_left)
            right_edges.append(edge_right)
            top_edges.append(edge_top)
            bottom_edges.append(edge_bottom)
            if edge * width / 100 > 3:
                break
            # 如果这是第一个,且存在无碰撞区域,那就不往后找了,针对这个做优化就行了,跟原来的思路一样,相当于先检查一次
            if j == 1 and edge > 0:
                break
        batch_edges.append(edges)
        batch_edges_left.append(left_edges)
        batch_edges_right.append(right_edges)
        batch_edges_top.append(top_edges)
        batch_edges_bottom.append(bottom_edges)

        # 从里面确定每张图最优的参数
        indexes = []
        # 用来表征位置优化方向的state表
        directions = []
        for edges, left_edges, right_edges, top_edges, bottom_edges in zip(
                batch_edges, batch_edges_left, batch_edges_right,
                batch_edges_top, batch_edges_bottom):
            index = np.argmax(edges)
            if np.max(edges) == 0:
                edges_lr = (left_edges + right_edges)
                if max(edges_lr) > 0:
                    index = np.argmax(edges_lr)
                    if index >= len(edges):
                        index = index - len(edges)
            indexes.append(index)
            direction = 0
            # 通过比较各边的边缘宽度来判定位置优化的方向
            edge_range = max(left_edges[index], right_edges[index])
            if abs(left_edges[index] - right_edges[index]) > edge_range // 2:
                direction = 1 if left_edges[index] > right_edges[index] else 2
            edge_range = max(top_edges[index], bottom_edges[index])
            if abs(top_edges[index] - bottom_edges[index]) > edge_range // 2:
                direction = 3 if top_edges[index] > bottom_edges[index] else 4

            directions.append(direction)
    return indexes, batch_edges, batch_edges_left, batch_edges_right, batch_edges_top, batch_edges_bottom, directions
Beispiel #28
0
    def transform(self, image_a, image_b, mask, semantic_a, semantic_b):
        """Apply transformations to image and corresponding mask.
        Transformations applied are:
            random horizontal flipping, resizing, random cropping and normalizing
        Arguments:
            image_a {Image} -- Image
            image_b {Image} -- Image
            mask {Image} -- Mask
        
        Returns:
            image_a, image_b, mask {Image, Image, Image} -- transformed image_a, pair image_b and mask
        """
        # Random horizontal flipping
        if torch.rand(1) > 0.5:
            image_a = image_a.transpose(Image.FLIP_LEFT_RIGHT)
            image_b = image_b.transpose(Image.FLIP_LEFT_RIGHT)
            mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
            semantic_a = semantic_a.transpose(Image.FLIP_LEFT_RIGHT)
            semantic_b = semantic_b.transpose(Image.FLIP_LEFT_RIGHT)

        # print('debugging mask transform 2 size',mask.size)
        # Resize
        resize = transforms.Resize(size=self.new_size)
        image_a = resize(image_a)
        image_b = resize(image_b)
        # print('dim image after resize',image.size)

        # Resize mask
        mask = mask.resize((image_b.width, image_b.height), Image.NEAREST)
        semantic_a = semantic_a.resize((image_b.width, image_b.height),
                                       Image.NEAREST)
        semantic_b = semantic_b.resize((image_b.width, image_b.height),
                                       Image.NEAREST)

        # print('debugging mask transform 3 size',mask.size)
        # Random crop
        i, j, h, w = transforms.RandomCrop.get_params(image_b,
                                                      output_size=(self.height,
                                                                   self.width))
        image_a = F.crop(image_a, i, j, h, w)
        image_b = F.crop(image_b, i, j, h, w)

        mask = F.crop(mask, i, j, h, w)
        semantic_a = F.crop(semantic_a, i, j, h, w)
        semantic_b = F.crop(semantic_b, i, j, h, w)

        # print('debugging mask transform 4 size',mask.size)
        # Transform to tensor
        to_tensor = transforms.ToTensor()
        image_a = to_tensor(image_a)
        image_b = to_tensor(image_b)
        semantic_a = to_tensor(semantic_a) * 255  #to_tensor clip to 0:1
        semantic_b = to_tensor(semantic_b) * 255
        semantic_a = mapping(semantic_a)
        semantic_b = mapping(semantic_b)

        if np.max(mask) == 1:
            mask = to_tensor(mask) * 255

        else:
            mask = to_tensor(mask)
        mask[mask > 0.5] = 1
        mask[mask < 0.5] = 0

        # print('debugging mask transform 5 size',mask.size)
        # Normalize
        normalizer = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

        image_a = normalizer(image_a)
        image_b = normalizer(image_b)
        #print(torch.unique(mask))
        #print(torch.unique(semantic_a))
        return image_a, image_b, mask, semantic_a, semantic_b
Beispiel #29
0
    def forward(
        self,
        image: Tensor,
        target: Optional[Dict[str, Tensor]] = None
    ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
        if target is None:
            raise ValueError("The targets can't be None for this transform.")

        if isinstance(image, torch.Tensor):
            if image.ndimension() not in {2, 3}:
                raise ValueError(
                    f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions."
                )
            elif image.ndimension() == 2:
                image = image.unsqueeze(0)

        orig_w, orig_h = F.get_image_size(image)

        while True:
            # sample an option
            idx = int(torch.randint(low=0, high=len(self.options), size=(1, )))
            min_jaccard_overlap = self.options[idx]
            if min_jaccard_overlap >= 1.0:  # a value larger than 1 encodes the leave as-is option
                return image, target

            for _ in range(self.trials):
                # check the aspect ratio limitations
                r = self.min_scale + (self.max_scale -
                                      self.min_scale) * torch.rand(2)
                new_w = int(orig_w * r[0])
                new_h = int(orig_h * r[1])
                aspect_ratio = new_w / new_h
                if not (self.min_aspect_ratio <= aspect_ratio <=
                        self.max_aspect_ratio):
                    continue

                # check for 0 area crops
                r = torch.rand(2)
                left = int((orig_w - new_w) * r[0])
                top = int((orig_h - new_h) * r[1])
                right = left + new_w
                bottom = top + new_h
                if left == right or top == bottom:
                    continue

                # check for any valid boxes with centers within the crop area
                cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2])
                cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3])
                is_within_crop_area = (left < cx) & (cx < right) & (
                    top < cy) & (cy < bottom)
                if not is_within_crop_area.any():
                    continue

                # check at least 1 box with jaccard limitations
                boxes = target["boxes"][is_within_crop_area]
                ious = torchvision.ops.boxes.box_iou(
                    boxes,
                    torch.tensor([[left, top, right, bottom]],
                                 dtype=boxes.dtype,
                                 device=boxes.device))
                if ious.max() < min_jaccard_overlap:
                    continue

                # keep only valid boxes and perform cropping
                target["boxes"] = boxes
                target["labels"] = target["labels"][is_within_crop_area]
                target["boxes"][:, 0::2] -= left
                target["boxes"][:, 1::2] -= top
                target["boxes"][:, 0::2].clamp_(min=0, max=new_w)
                target["boxes"][:, 1::2].clamp_(min=0, max=new_h)
                image = F.crop(image, top, left, new_h, new_w)

                return image, target
 def torchvision(self, img):
     return torchvision.crop(img, i=0, j=0, h=64, w=64)
Beispiel #31
0
 def torchvision_transform(self, img):
     return torchvision.crop(img, top=0, left=0, height=64, width=64)