Ejemplo n.º 1
0
    def transform_d(self, image, mask):
        """
        Random crop + Random horizontal flipping + Random vertical flipping
        """
        # Random crop
        i, j, h, w = RandomCrop.get_params(image,
                                           output_size=(self.crop_size,
                                                        self.crop_size))
        image = TF.crop(image, i, j, h, w)
        mask = TF.crop(mask, i, j, h, w)

        # resize
        image = image
        image = TF.resize(image, (self.crop_size // self.upscale_factor,
                                  self.crop_size // self.upscale_factor))

        # Random horizontal flipping
        if random.random() > 0.5:
            image = TF.hflip(image)
            mask = TF.hflip(mask)

        # Random vertical flipping
        if random.random() > 0.5:
            image = TF.vflip(image)
            mask = TF.vflip(mask)

        # Transform to tensor
        image = TF.to_tensor(image)
        mask = TF.to_tensor(mask)
        return image, mask
Ejemplo n.º 2
0
    def __getitem__(self, idx):

        triple = self.file_triples[idx]
        color_file = os.path.join(self.data_dir, 'color', triple[0])
        depth_file = os.path.join(self.data_dir, 'height', triple[2])

        cache_triple = self.cache_triples[idx]
        if cache_triple is None:
            color_image = Image.open(color_file)
            depth_image = Image.open(depth_file)
            depth_image = TF.to_grayscale(depth_image)

            color_image = TF.resize(color_image, self.size * 2)
            depth_image = TF.resize(depth_image, self.size * 2)
            self.cache_triples[idx] = (color_image, depth_image)
        else:
            color_image = cache_triple[0]
            depth_image = cache_triple[1]

        crop_paras = RandomCrop.get_params(color_image,
                                           output_size=(self.size, self.size))
        color_image = TF.crop(color_image, *crop_paras)
        depth_image = TF.crop(depth_image, *crop_paras)

        color_image = TF.to_tensor(color_image).to(self.device)
        depth_image = TF.to_tensor(depth_image).to(self.device)

        item = {'color': color_image, 'depth': depth_image}
        return item
Ejemplo n.º 3
0
    def cache_func(self, i):
        # caches the ith chunk of images
        # custom function for using HDF5Cache
        lr_images = []
        hr_images = []
        offset = i * self.cache.cache_size // self.mult
        for idx in range(self.cache.cache_size // self.mult):
            if offset + idx + 1 > 800:
                idx -= self.cache.cache_size
            img_hr_name = ("./datasets/saved/DIV2K_train_HR/" +
                           str(offset + idx + 1).zfill(4) + ".png")
            img_lr_name = (
                f"./datasets/saved/DIV2K_train_LR_bicubic/X{self.factor}/" +
                str(offset + idx + 1).zfill(4) + f"x{self.factor}.png")
            # C,H,W
            img_hr = Image.open(img_hr_name)
            img_lr = Image.open(img_lr_name)

            hr_size = self.size * self.factor
            f = self.factor
            for j in range(self.mult):
                ii, j, k, l = RandomCrop.get_params(
                    img_hr,
                    (hr_size, hr_size))  # can't use i as variable name :/
                hr_crop = TF.crop(img_hr, ii, j, k, l)
                lr_crop = TF.crop(img_lr, ii // f, j // f, k // f, l // f)
                lr_images.append(ToTensor()(lr_crop))
                hr_images.append(ToTensor()(hr_crop))

        lr_stacked = np.stack(lr_images)
        hr_stacked = np.stack(hr_images)
        # print(lr_stacked.shape)
        lr_type = lr_stacked.astype(np.float32)
        hr_type = hr_stacked.astype(np.float32)
        self.cache.cache_images(i, lr_type, hr_type)
Ejemplo n.º 4
0
def random_resize_crop_synth(augment_targets, size):
    image, region_score, affinity_score, confidence_mask = augment_targets

    image = Image.fromarray(image)
    region_score = Image.fromarray(region_score)
    affinity_score = Image.fromarray(affinity_score)
    confidence_mask = Image.fromarray(confidence_mask)

    short_side = min(image.size)
    i, j, h, w = RandomCrop.get_params(image,
                                       output_size=(short_side, short_side))

    image = resized_crop(image,
                         i,
                         j,
                         h,
                         w,
                         size=(size, size),
                         interpolation=InterpolationMode.BICUBIC)
    region_score = resized_crop(region_score,
                                i,
                                j,
                                h,
                                w, (size, size),
                                interpolation=InterpolationMode.BICUBIC)
    affinity_score = resized_crop(
        affinity_score,
        i,
        j,
        h,
        w,
        (size, size),
        interpolation=InterpolationMode.BICUBIC,
    )
    confidence_mask = resized_crop(
        confidence_mask,
        i,
        j,
        h,
        w,
        (size, size),
        interpolation=InterpolationMode.NEAREST,
    )

    image = np.array(image)
    region_score = np.array(region_score)
    affinity_score = np.array(affinity_score)
    confidence_mask = np.array(confidence_mask)
    augment_targets = [image, region_score, affinity_score, confidence_mask]

    return augment_targets
Ejemplo n.º 5
0
    def __getitem__(self, index):
        edge = pil_loader(
            os.path.join(os.getcwd(), self.datapath_lines, self.lines[index]))
        img = pil_loader(
            os.path.join(os.getcwd(), self.datapath_imgs, self.imgs[index]))

        # Random scaling/rotation
        scale = random.random() + 0.5
        angle = random.random() * 0.25
        edge = TF.affine(edge,
                         angle=angle,
                         scale=scale,
                         translate=(0, 0),
                         shear=0)
        img = TF.affine(img,
                        angle=angle,
                        scale=scale,
                        translate=(0, 0),
                        shear=0)

        # Random horizontal flipping
        if random.random() > 0.5:
            img = TF.hflip(img)
            edge = TF.hflip(edge)

        # Random crop
        i, j, h, w = RandomCrop.get_params(edge, output_size=(384, 384))
        edge = TF.crop(edge, i, j, h, w)
        img = TF.crop(img, i, j, h, w)

        # Random brightness/contrast/sat/hue
        t_jitter = ColorJitter(brightness=0.5,
                               contrast=0.5,
                               saturation=0.5,
                               hue=0.5)
        img = t_jitter(img)

        # Deterioration
        if random.random() > 0.1:
            edge_d = TF.to_tensor(CustomDataset.deteriorate(edge))
            edge = 1 - TF.to_tensor(
                edge)  # inversion as BIPED images are white on black
        else:
            edge = 1 - TF.to_tensor(edge)
            edge_d = edge.detach()

        img = TF.to_tensor(img)
        return self.normalize(img), edge, edge_d
Ejemplo n.º 6
0
 def train_transform(self, hr, lr, crop_size, upscale_factor):
     wd_hr, ht_hr = hr.size
     wd_lr = int(crop_size / upscale_factor)
     ht_lr = wd_lr
     # print(ht_lr,wd_lr)
     i, j, h, w = RandomCrop.get_params(hr,
                                        output_size=(crop_size, crop_size))
     hr_train = TF.crop(hr, i, j, h, w)
     if upscale_factor == 1:
         lr_train = TF.crop(lr, i, j, h, w)
     else:
         lr_temp = lr.resize((wd_hr, ht_hr), Image.BICUBIC)
         lr_train = TF.crop(lr_temp, i, j, h, w)
         lr_train = lr_train.resize((wd_lr, ht_lr), Image.BICUBIC)
     hr_train = TF.to_tensor(hr_train)
     lr_train = TF.to_tensor(lr_train)
     return lr_train, hr_train
Ejemplo n.º 7
0
def random_crop(augment_targets, size):
    image, region_score, affinity_score, confidence_mask = augment_targets

    image = Image.fromarray(image)
    region_score = Image.fromarray(region_score)
    affinity_score = Image.fromarray(affinity_score)
    confidence_mask = Image.fromarray(confidence_mask)

    i, j, h, w = RandomCrop.get_params(image, output_size=(size, size))

    image = crop(image, i, j, h, w)
    region_score = crop(region_score, i, j, h, w)
    affinity_score = crop(affinity_score, i, j, h, w)
    confidence_mask = crop(confidence_mask, i, j, h, w)

    image = np.array(image)
    region_score = np.array(region_score)
    affinity_score = np.array(affinity_score)
    confidence_mask = np.array(confidence_mask)
    augment_targets = [image, region_score, affinity_score, confidence_mask]

    return augment_targets
Ejemplo n.º 8
0
    def __getitem__(self, index):
        #Get meta-data
        video_id, entity_id = self.entity_list[index]
        entity_metadata = self.entity_data[video_id][entity_id]

        audio_offset = float(entity_metadata[0][1])
        mid_index = random.randint(0, len(entity_metadata) - 1)
        midone = entity_metadata[mid_index]
        target = int(midone[-1])
        target_audio = self.speech_data[video_id][midone[1]]

        clip_meta_data = cu.generate_clip_meta(entity_metadata, mid_index,
                                               self.half_clip_length)
        video_data, audio_data = io.load_av_clip_from_metadata(
            clip_meta_data, self.video_root, self.audio_root, audio_offset,
            self.target_size)

        if self.do_video_augment:
            # random flip
            if bool(random.getrandbits(1)):
                video_data = [
                    s.transpose(Image.FLIP_LEFT_RIGHT) for s in video_data
                ]

            # random crop
            width, height = video_data[0].size
            f = random.uniform(0.5, 1)
            i, j, h, w = RandomCrop.get_params(video_data[0],
                                               output_size=(int(height * f),
                                                            int(width * f)))
            video_data = [s.crop(box=(j, i, w, h)) for s in video_data]

        if self.video_transform is not None:
            video_data = [self.video_transform(vd) for vd in video_data]

        video_data = torch.cat(video_data, dim=0)
        return (np.float32(audio_data), video_data), target, target_audio
Ejemplo n.º 9
0
 def crop_imgs(*pil_imgs, size):
     i, j, h, w = RandomCrop.get_params(pil_imgs[0], size)
     for img in pil_imgs:
         yield F.crop(img, i, j, h, w)