예제 #1
0
    def __getitem__(self, index):
        data = broden_dataset.resolve_record(self.list_sample[index])
        output = {}

        # image
        img = data['img']
        img = img[:, :, ::-1]  # BGR to RGB!!!
        ori_height, ori_width, _ = img.shape
        img_resized_list = []
        for this_short_size in self.imgSize:
            # calculate target height and width
            scale = min(this_short_size / float(min(ori_height, ori_width)),
                        self.imgMaxSize / float(max(ori_height, ori_width)))
            target_height, target_width = int(ori_height * scale), int(
                ori_width * scale)

            # to avoid rounding in network
            target_height = round2nearest_multiple(target_height,
                                                   self.padding_constant)
            target_width = round2nearest_multiple(target_width,
                                                  self.padding_constant)

            # resize
            img_resized = cv2.resize(img.copy(), (target_width, target_height))

            # image to float
            img_resized = img_resized.astype(np.float32)
            img_resized = img_resized.transpose((2, 0, 1))
            img_resized = self.img_transform(torch.from_numpy(img_resized))

            img_resized_list.append(img_resized)
        output['img_resized_list'] = [x.contiguous() for x in img_resized_list]
        output['original_img'] = img

        # object
        output['seg_object'] = torch.from_numpy(data["seg_obj"].astype(
            np.int32)).long().contiguous()
        output['valid_object'] = torch.tensor(int(data['valid_obj'])).long()

        # part
        output['seg_part'] = torch.from_numpy(
            np.sum(data["batch_seg_part"],
                   axis=0).astype(np.uint8)).long().contiguous()
        output['valid_part'] = torch.from_numpy(data['valid_part'].astype(
            np.uint8)).long()

        # scene
        output['scene_label'] = torch.tensor(int(data['scene_label']))

        # material
        output['seg_material'] = torch.from_numpy(
            data['seg_material']).contiguous()
        output['valid_material'] = torch.tensor(int(data['valid_mat'])).long()

        return output
예제 #2
0
    def __getitem__(self, index):
        # NOTE: random shuffle for the first time. shuffle in __init__ is useless
        if not self.if_shuffled:
            np.random.shuffle(self.list_sample)
            self.if_shuffled = True

        # get sub-batch candidates
        batch_records = self._get_sub_batch()

        # resize all images' short edges to the chosen size
        if isinstance(self.imgSize, list):
            this_short_size = np.random.choice(self.imgSize)
        else:
            this_short_size = self.imgSize

        # calculate the BATCH's height and width
        # since we concat more than one samples, the batch's h and w shall be larger than EACH sample
        batch_resized_size = np.zeros((self.batch_per_gpu, 2), np.int32)
        for i in range(self.batch_per_gpu):
            img_height, img_width = batch_records[i]['height'], batch_records[
                i]['width']
            this_scale = min(this_short_size / min(img_height, img_width),
                             self.imgMaxSize / max(img_height, img_width))
            img_resized_height, img_resized_width = img_height * this_scale, img_width * this_scale
            batch_resized_size[i, :] = img_resized_height, img_resized_width
        batch_resized_height = np.max(batch_resized_size[:, 0])
        batch_resized_width = np.max(batch_resized_size[:, 1])

        # Here we must pad both input image and segmentation map to size h' and w' so that p | h' and p | w'
        batch_resized_height = int(
            round2nearest_multiple(batch_resized_height,
                                   self.padding_constant))
        batch_resized_width = int(
            round2nearest_multiple(batch_resized_width, self.padding_constant))

        assert self.padding_constant >= self.segm_downsampling_rate, \
            'padding constant must be equal or large than segm downsamping rate'

        batch_images = torch.zeros(
            (self.batch_per_gpu, 3, batch_resized_height, batch_resized_width))
        batch_objs = torch.zeros(
            (self.batch_per_gpu,
             batch_resized_height // self.segm_downsampling_rate,
             batch_resized_width // self.segm_downsampling_rate)).long()
        batch_valid_obj = torch.zeros(self.batch_per_gpu).long()
        batch_parts = torch.zeros(
            (self.batch_per_gpu, broden_dataset.nr_object_with_part,
             batch_resized_height // self.segm_downsampling_rate,
             batch_resized_width // self.segm_downsampling_rate)).long()
        batch_valid_parts = torch.zeros(
            (self.batch_per_gpu, broden_dataset.nr_object_with_part)).long()
        batch_scene_labels = torch.zeros(self.batch_per_gpu).long()
        batch_material = torch.zeros(
            (self.batch_per_gpu,
             batch_resized_height // self.segm_downsampling_rate,
             batch_resized_width // self.segm_downsampling_rate)).long()
        batch_valid_mat = torch.zeros(self.batch_per_gpu).long()

        for i in range(self.batch_per_gpu):

            data = broden_dataset.resolve_record(batch_records[i])

            img = data['img']
            seg_obj = data["seg_obj"]
            valid_obj = data["valid_obj"]
            seg_part = data["batch_seg_part"]
            valid_part = data["valid_part"]
            scene_label = data["scene_label"]
            seg_material = data["seg_material"]
            valid_mat = data["valid_mat"]

            # scene
            batch_scene_labels[i] = int(scene_label)

            # random flip img obj part material
            if self.random_flip:
                random_flip = np.random.choice([0, 1])
                if random_flip == 1:
                    img = cv2.flip(img, 1)
                    seg_obj = cv2.flip(seg_obj, 1)
                    seg_part = np.flip(seg_part, 2)
                    seg_material = cv2.flip(seg_material, 1)

            # img
            img = imresize(
                img, (batch_resized_size[i, 0], batch_resized_size[i, 1]),
                interp='bilinear')
            img = img.astype(np.float32)[:, :, ::-1]  # RGB to BGR!!!
            img = img.transpose((2, 0, 1))
            img = self.img_transform(torch.from_numpy(img.copy()))
            batch_images[i][:, :img.shape[1], :img.shape[2]] = img

            # object and part
            if valid_obj:
                batch_valid_obj[i] = valid_obj

                # object
                segm = uint16_imresize(
                    seg_obj,
                    (batch_resized_size[i, 0], batch_resized_size[i, 1]))
                segm_rounded_height = round2nearest_multiple(
                    segm.shape[0], self.padding_constant)
                segm_rounded_width = round2nearest_multiple(
                    segm.shape[1], self.padding_constant)
                segm_rounded = np.zeros(
                    (segm_rounded_height, segm_rounded_width), dtype='uint16')
                segm_rounded[:segm.shape[0], :segm.shape[1]] = segm
                segm = uint16_imresize(
                    segm_rounded,
                    (segm_rounded.shape[0] // self.segm_downsampling_rate,
                     segm_rounded.shape[1] // self.segm_downsampling_rate))
                batch_objs[i][:segm.shape[0], :segm.
                              shape[1]] = torch.from_numpy(
                                  np.array(segm, dtype=np.int32))

                # part
                if np.sum(valid_part) == 0:
                    continue

                parts_resized = []
                for j in range(broden_dataset.nr_object_with_part):
                    parts_resized.append(
                        imresize(seg_part[j], (batch_resized_size[i, 0],
                                               batch_resized_size[i, 1]),
                                 interp='nearest'))
                for j in range(broden_dataset.nr_object_with_part):
                    if not valid_part[j]:
                        continue
                    part_rounded = np.zeros(
                        (segm_rounded_height, segm_rounded_width),
                        dtype='uint8')
                    part_rounded[:parts_resized[j].shape[0], :parts_resized[j].
                                 shape[1]] = parts_resized[j]
                    part = imresize(
                        part_rounded,
                        (part_rounded.shape[0] // self.segm_downsampling_rate,
                         part_rounded.shape[1] // self.segm_downsampling_rate),
                        interp='nearest')
                    batch_parts[i][j][:part.shape[0], :part.
                                      shape[1]] = torch.from_numpy(part.copy())
                    # NOTE: part seg might disappear after resize.
                    if len(np.unique(part)) > 1:
                        batch_valid_parts[i][j] = 1
            # material
            if valid_mat:
                batch_valid_mat[i] = valid_mat
                segm = imresize(
                    seg_material,
                    (batch_resized_size[i, 0], batch_resized_size[i, 1]),
                    interp='nearest')
                segm_rounded_height = round2nearest_multiple(
                    segm.shape[0], self.padding_constant)
                segm_rounded_width = round2nearest_multiple(
                    segm.shape[1], self.padding_constant)
                segm_rounded = np.zeros(
                    (segm_rounded_height, segm_rounded_width), dtype='uint8')
                segm_rounded[:segm.shape[0], :segm.shape[1]] = segm
                segm = imresize(
                    segm_rounded,
                    (segm_rounded.shape[0] // self.segm_downsampling_rate,
                     segm_rounded.shape[1] // self.segm_downsampling_rate),
                    interp='nearest')
                batch_material[i][:segm.shape[0], :segm.
                                  shape[1]] = torch.from_numpy(segm.copy())

        # use compressed part segm
        # TODO(LYC):: remove compression
        batch_parts = torch.sum(batch_parts, dim=1)

        # convert numpy array to torch tensor

        output = dict(
            img=batch_images,
            seg_object=batch_objs,
            valid_object=batch_valid_obj,
            seg_part=batch_parts,
            valid_part=batch_valid_parts,
            scene_label=batch_scene_labels,
            seg_material=batch_material,
            valid_material=batch_valid_mat,
            source_idx=torch.tensor(self.source_idx),
        )

        return output
예제 #3
0
    def __getitem__(self, index):
        data = broden_dataset.resolve_record(self.list_sample[index])
        output = {"img_file_path": data["img_file_path"]}

        # image
        img = data['img']
        img = img[:, :, ::
                  -1]  # BGR to RGB!!! -- Remon: Is it correct to do that -- this is actually RGB to BGR
        ori_height, ori_width, _ = img.shape
        img_resized_list = []
        # for this_short_size in self.imgSize:
        #     # calculate target height and width
        #     scale = min(this_short_size / float(min(ori_height, ori_width)),
        #                 self.imgMaxSize / float(max(ori_height, ori_width)))
        #     target_height, target_width = int(ori_height * scale), int(ori_width * scale)

        #     # to avoid rounding in network
        #     target_height = round2nearest_multiple(target_height, self.padding_constant)
        #     target_width = round2nearest_multiple(target_width, self.padding_constant)

        #     # resize
        #     img_resized = cv2.resize(img.copy(), (target_width, target_height))

        #     # image to float
        #     img_resized = img_resized.astype(np.float32)
        #     img_resized = img_resized.transpose((2, 0, 1))
        #     img_resized = self.img_transform(torch.from_numpy(img_resized))

        #     img_resized_list.append(img_resized)
        # output['img_resized_list'] = [x.contiguous() for x in img_resized_list]
        # output['original_img'] = img
        # output['img'] = torch.from_numpy(
        #     img.copy().transpose((2, 0, 1)).astype(np.float32)) # Added by Remon for testing only

        # This part is added by Remon
        # calculate target height and width
        scale = min(self.imgSize / float(min(ori_height, ori_width)),
                    self.imgMaxSize / float(max(ori_height, ori_width)))
        target_height, target_width = int(ori_height * scale), int(ori_width *
                                                                   scale)

        # to avoid rounding in network
        target_height = round2nearest_multiple(target_height,
                                               self.padding_constant)
        target_width = round2nearest_multiple(target_width,
                                              self.padding_constant)

        # resize
        img_resized = cv2.resize(img.copy(), (target_width, target_height))

        # image to float
        img_resized = img_resized.astype(np.float32)
        img_resized = img_resized.transpose((2, 0, 1))
        img_resized = self.img_transform(torch.from_numpy(img_resized))
        output['img'] = img_resized

        # Dimensions of segmentation
        segm_height = target_height // self.segm_downsampling_rate
        segm_width = target_width // self.segm_downsampling_rate

        # object
        seg_object_resized = \
            cv2.resize(data["seg_obj"], (segm_width, segm_height),
                       interpolation=cv2.INTER_NEAREST)
        # output['seg_object'] = torch.from_numpy(
        #     data["seg_obj"].astype(np.int32)).long().contiguous()
        output['seg_object'] = torch.from_numpy(
            seg_object_resized.astype(np.int32)).long().contiguous()
        output['valid_object'] = torch.tensor(int(data['valid_obj'])).long()

        # part
        # output['seg_part'] = torch.from_numpy(
        #     np.sum(data["batch_seg_part"], axis=0).astype(np.uint8)).long().contiguous()
        seg_part = np.sum(data["batch_seg_part"], axis=0).astype(np.uint8)
        seg_part_resized = \
            cv2.resize(seg_part, (segm_width, segm_height),
                       interpolation=cv2.INTER_NEAREST)
        output['seg_part'] = torch.from_numpy(seg_part_resized.astype(
            np.int32)).long().contiguous()
        output['valid_part'] = \
            torch.from_numpy(data['valid_part'].astype(np.uint8)).long()

        # scene
        output['scene_label'] = torch.tensor(int(data['scene_label']))

        # material
        # output['seg_material'] = torch.from_numpy(data['seg_material']).contiguous()
        seg_material_resized = \
            cv2.resize(data['seg_material'], (segm_width, segm_height),
                       interpolation=cv2.INTER_NEAREST)
        output['seg_material'] = torch.from_numpy(
            seg_material_resized.astype(np.int32)).long().contiguous()
        output['valid_material'] = torch.tensor(int(data['valid_mat'])).long()

        return output