def render_objects(self, min_obj_num=5):
        obj_num = np.random.randint(min_obj_num, len(self.objs))
        dummy_num = np.random.randint(2, len(self.dummy_objs))
        # obj_ind = np.random.randint(0, len(self.objs), obj_num)
        # dummy_ind = np.random.randint(0, len(self.dummy_objs), dummy_num)
        obj_ind = np.random.choice(np.arange(len(self.objs)), obj_num, replace=False)
        dummy_ind = np.random.choice(np.arange(len(self.dummy_objs)), dummy_num, replace=False)
        pos_list = []
        rot_list = []
        model_list = []
        ret_pos = np.zeros((len(self.objs), 3))
        ret_rot = np.zeros((len(self.objs), 3, 3))
        for i, obj_idx in enumerate(obj_ind):
            pos, rot = self.pose_generator()
            pos_list.append(pos.T)
            rot_list.append(rot)
            model_list.append(self.models[obj_idx])
            ret_pos[obj_idx] = pos
            ret_rot[obj_idx] = rot
        for j, dummy_idx in enumerate(dummy_ind):
            pos, rot = self.pose_generator()
            pos_list.append(pos.T)
            rot_list.append(rot)
            model_list.append(self.dummy_models[dummy_idx])
        labels_idx = np.hstack((obj_ind + 1, np.ones(len(dummy_ind)) * -1)).astype(np.int64)
        # labels_idx = obj_ind + 1
        min_light = 0.8
        ren_rgb, ren_depth, ren_label  = multi_object_renderer.render(
            model_list, (self.img_width, self.img_height), self.K, rot_list, pos_list, 0.1, 4.0,
            ambient_weight=np.random.rand(),
            light_color= np.random.rand(3) * (1.0 - min_light) + min_light,
            labels=labels_idx, mode='rgb+depth+label')
        # data augmentation for RGB
        if self.gaussian_noise and np.random.randint(0,2):
            ren_rgb = preprocess_utils.add_noise(ren_rgb)
        if self.avaraging and np.random.randint(0,2):
            ren_rgb = preprocess_utils.avaraging(ren_rgb)
        rand_gamma = np.random.randint(0, 3)
        if self.gamma_augmentation and rand_gamma:
            if rand_gamma - 1:
                ren_rgb = preprocess_utils.gamma_augmentation(ren_rgb)
            else:
                ren_rgb = preprocess_utils.gamma_augmentation(ren_rgb, gamma=1.5)
        if self.salt_pepper_noise and np.random.randint(0,2):
            ren_rgb = preprocess_utils.salt_pepper_augmentation(ren_rgb)
        rand_contrast = np.random.randint(0, 3)
        if self.contrast and rand_contrast:
            if rand_contrast - 1:
                ren_rgb = self.contrast_server.high_contrast(ren_rgb)
            else:
                ren_rgb = self.contrast_server.low_contrast(ren_rgb)

        return ren_rgb[:,:,::-1], ren_depth, ren_label, ret_pos.astype(np.float32), ret_rot.astype(np.float32)
    def load_bg_data(self, idx):
        bg = cv2.imread(self.bg_fpaths[idx])
        # random crop
        height, width, ch = bg.shape
        resize_height = int((np.random.rand() * 0.5 + 0.5) * height)
        resize_width = int((np.random.rand() * 0.5 + 0.5) * width)
        crop_h = np.floor((height - resize_height) * np.random.rand()).astype(np.int64)
        crop_w = np.floor((width - resize_width) * np.random.rand()).astype(np.int64)

        bg = bg[crop_h:(crop_h + resize_height), crop_w:(crop_w + resize_width)]
        bg = cv2.resize(bg, (self.img_width, self.img_height))

        # data augmentation
        if self.gaussian_noise and np.random.randint(0,2):
            bg = preprocess_utils.add_noise(bg)
        if self.avaraging and np.random.randint(0,2):
            bg = preprocess_utils.avaraging(bg)
        rand_gamma = np.random.randint(0, 3)
        if self.gamma_augmentation and rand_gamma:
            if rand_gamma - 1:
                bg = preprocess_utils.gamma_augmentation(bg)
            else:
                bg = preprocess_utils.gamma_augmentation(bg, gamma=1.5)
        if self.salt_pepper_noise and np.random.randint(0,2):
            bg = preprocess_utils.salt_pepper_augmentation(bg)
        # rand_contrast = np.random.randint(0, 3)
        # if self.contrast and rand_contrast:
        #     if rand_contrast - 1:
        #         bg = self.contrast_server.high_contrast(bg)
        #     else:
        #         bg = self.contrast_server.low_contrast(bg)
        if self.contrast and np.random.randint(0,2):
            bg = self.contrast_server.low_contrast(bg)

        if self.bg_flip and np.random.randint(0,2):
            bg = bg[:,::-1, :]
        if self.channel_swap:
            bg = bg[:, :, np.random.choice(np.arange(3), 3, replace=False)]

        return bg
Beispiel #3
0
    def get_example(self, i):
        img_size = self.img_size
        c_i = self.class_indices[i // self.n_view]
        v_i = self.view_indices[i % self.n_view]
        img_rgb, mask, pc = self.load_orig_data(c_i, v_i)

        # image, label = self.base[i]
        # _, h, w = image.shape

        # TODO image preprocessing
        #     - Cropping (random or center rectangular)

        # if self.random:
        #     # Randomly crop a region and flip the image
        #     top = random.randint(0, h - crop_size - 1)
        #     left = random.randint(0, w - crop_size - 1)
        #     if random.randint(0, 1):
        #         image = image[:, :, ::-1]
        # else:
        #     # Crop the center
        #     top = (h - crop_size) // 2
        #     left = (w - crop_size) // 2
        # bottom = top + crop_size
        # right = left + crop_size

        # image = image[:, top:bottom, left:right]
        # image -= self.mean[:, top:bottom, left:right]
        # image *= (1.0 / 255.0)  # Scale to [0, 1];

        if self.random:
            img_rgb = preprocess_utils.add_noise(img_rgb)
            rand_h = random.randint(0, 40)
            rand_w = random.randint(0, 40)
            img_rgb = img_rgb[(120 + rand_h):(120 + 192 + rand_h),
                              (140 + rand_w):(140 + 256 + rand_w)]
            img_depth = pc[(120 + rand_h):(120 + 192 + rand_h),
                           (140 + rand_w):(140 + 256 + rand_w)]
            mask = mask[(120 + rand_h):(120 + 192 + rand_h),
                        (140 + rand_w):(140 + 256 + rand_w)]
            pc = pc[(120 + rand_h):(120 + 192 + rand_h),
                    (140 + rand_w):(140 + 256 + rand_w)]
        else:
            img_rgb = img_rgb[140:332, 160:416]
            img_depth = pc[140:332, 160:416]
            mask = mask[140:332, 160:416]
            pc = pc[140:332, 160:416]

        img_rgb = img_rgb / 255.0  # Scale to [0, 1];
        img_rgb = cv2.resize(img_rgb, img_size)

        # simple inpaint depth (using opencv function only considering depth, not using rgb)
        img_depth = np.sqrt(np.square(img_depth).sum(axis=2))
        img_depth = preprocess_utils.depth_inpainting(img_depth)

        ksizes = preprocess_utils.roi_kernel_size(img_depth)
        ksizes = cv2.resize(ksizes, img_size)

        ## still we do not use depth
        img_depth = img_depth.reshape(1, img_size[1],
                                      img_size[0]).astype(np.float32)

        # only consider range 0.5 ~ 2.5[m]
        img_depth = (img_depth - 0.5) / 2.0
        img_depth[img_depth > 1.0] = 1.0
        img_depth[img_depth < 0.0] = 0.0

        img_depth = cv2.resize(img_depth, img_size)

        # 1 ch mask
        mask = mask.transpose(2, 0, 1)[0] / 255.0  # Scale to [0, 1];

        ## random flip images
        if self.random_flip:
            rand_flip = random.randint(0, 1)
            if rand_flip:
                img_rgb = img_rgb[:, ::-1, :]
                ksizes = ksizes[:, ::-1]
                img_depth = img_depth[:, :, ::-1]
                mask = mask[:, ::-1]

        # random resizing
        if self.random_resize:
            if self.force_resize or random.randint(0, 1):
                resize_ratio = random.uniform(0.5, 1.5)
                resized_imsize = (int(img_size[0] * resize_ratio),
                                  int(img_size[1] * resize_ratio))

                if resize_ratio < 1.0:
                    if not self.resize_train:
                        resize_ratio = 0.75
                    # if self.resize_train:
                    #     resize_ratio = 0.5
                    # else:
                    #     resize_ratio = 0.75
                    clop_h = random.randint(0, img_size[1] - resized_imsize[1])
                    clop_w = random.randint(0, img_size[0] - resized_imsize[0])

                    img_rgb = img_rgb[clop_h:(clop_h + resized_imsize[1]),
                                      clop_w:(clop_w + resized_imsize[0]), :]
                    img_rgb = cv2.resize(img_rgb, img_size)

                    ksizes = ksizes[clop_h:(clop_h + resized_imsize[1]),
                                    clop_w:(clop_w + resized_imsize[0])]
                    ksizes = cv2.resize(ksizes, img_size)

                    mask = mask[clop_h:(clop_h + resized_imsize[1]),
                                clop_w:(clop_w + resized_imsize[0])]
                    mask = cv2.resize(mask, img_size)

                elif resize_ratio > 1.0:
                    if resize_ratio < 1.0:
                        if not self.resize_train:
                            resize_ratio = 1.25
                        # if self.resize_train:
                        #     resize_ratio = 1.5
                        # else:
                        #     resize_ratio = 1.25
                        clop_h = random.randint(
                            0, resized_imsize[1] - img_size[1])
                        clop_w = random.randint(
                            0, resized_imsize[0] - img_size[0])

                        img_rgb = cv2.resize(img_rgb, resized_imsize)
                        img_rgb = img_rgb[clop_h:(clop_h + img_size[1]),
                                          clop_w:(clop_w + img_size[0]), :]

                        ksizes = cv2.resize(ksizes, resized_imsize)
                        ksizes = ksizes[clop_h:(clop_h + img_size[1]),
                                        clop_w:(clop_w + img_size[0])]

                        mask = cv2.resize(mask, resized_imsize)
                        mask = mask[clop_h:(clop_h + img_size[1]),
                                    clop_w:(clop_w + img_size[0])]

                ksizes = ksizes * resize_ratio

        # create 1 / 4 label mask
        imsize_resizeh = int(img_size[0] * self.out_size)
        imsize_resizew = int(img_size[1] * self.out_size)
        mask = cv2.resize(mask, (imsize_resizeh, imsize_resizew))
        label = mask * c_i

        img_rgb = img_rgb.transpose(2, 0, 1).astype(np.float32)
        ksizes = ksizes.reshape(1, img_size[0], img_size[1])

        return img_rgb, ksizes.astype(np.float32), label.astype(np.int32)
Beispiel #4
0
    def get_example(self, i):
        img_size = self.img_size
        c_i = self.class_indices[i // self.n_view]
        v_i = self.view_indices[i % self.n_view]
        img_rgb, mask, pos, rot, pc = self.load_orig_data(c_i, v_i)

        # image, label = self.base[i]
        # _, h, w = image.shape

        # TODO image preprocessing
        #     - Cropping (random or center rectangular)
        #     - Random flip

        # if self.random:
        #     # Randomly crop a region and flip the image
        #     top = random.randint(0, h - crop_size - 1)
        #     left = random.randint(0, w - crop_size - 1)
        #     if random.randint(0, 1):
        #         image = image[:, :, ::-1]
        # else:
        #     # Crop the center
        #     top = (h - crop_size) // 2
        #     left = (w - crop_size) // 2
        # bottom = top + crop_size
        # right = left + crop_size

        # image = image[:, top:bottom, left:right]
        # image -= self.mean[:, top:bottom, left:right]
        # image *= (1.0 / 255.0)  # Scale to [0, 1];

        ## temporary crop
        # img_rgb = img_rgb[48:432,34:576]
        # img_depth = img_depth[48:432,34:576]
        # mask = mask[48:432,34:576]
        # pc =  pc[48:432,34:576]

        if self.random:
            img_rgb = preprocess_utils.add_noise(img_rgb)
            rand_h = random.randint(0,40)
            rand_w = random.randint(0,40)
            img_rgb = img_rgb[(120+rand_h):(120+192+rand_h), (140+rand_w):(140+256+rand_w)]
            img_depth = pc[(120+rand_h):(120+192+rand_h), (140+rand_w):(140+256+rand_w)]
            mask = mask[(120+rand_h):(120+192+rand_h), (140+rand_w):(140+256+rand_w)]
            pc = pc[(120+rand_h):(120+192+rand_h), (140+rand_w):(140+256+rand_w)]
        else:
            img_rgb = img_rgb[140:332,160:416]
            img_depth = pc[140:332,160:416]
            mask = mask[140:332,160:416]
            pc =  pc[140:332,160:416]

        img_rgb = img_rgb / 255.0  # Scale to [0, 1];
        img_rgb = cv2.resize(img_rgb, img_size)
        img_rgb = img_rgb.transpose(2,0,1).astype(np.float32)

        # simple inpaint depth (using opencv function only considering depth, not using rgb)
        img_depth = np.sqrt(np.square(img_depth).sum(axis=2))
        img_depth = preprocess_utils.depth_inpainting(img_depth)

        # only consider range 0.5 ~ 2.5[m]
        img_depth = (img_depth - 0.5) / 2.0
        img_depth[img_depth > 1.0] = 1.0
        img_depth[img_depth < 0.0] = 0.0

        img_depth =  cv2.resize(img_depth, img_size)
        img_depth = img_depth.reshape(1, img_size[1], img_size[0]).astype(np.float32)

        mask = mask.transpose(2,0,1)[0] / 255.0  # Scale to [0, 1];
        mask = cv2.resize(mask, img_size)
        label = mask * c_i

        pc = cv2.resize(pc, img_size).transpose(2,0,1)
        rot_param = preprocess_utils.rpy_param(rot)

        mask5 = np.tile(mask.flatten(), 5).reshape(5, mask.shape[0], mask.shape[1])
        rot_map = mask5 * rot_param[:,np.newaxis, np.newaxis]

        dist_map = pc
        dist_map = pos[:,np.newaxis,np.newaxis] - dist_map
        dist_map[dist_map!=dist_map] = 0

        return img_rgb, img_depth, label.astype(np.int32), dist_map.astype(np.float32), pos, rot_param, rot_map.astype(np.float32), pc.astype(np.float32)
Beispiel #5
0
    def get_example(self, i):
        img_size = self.img_size
        c_i = self.class_indices[i // self.n_view]
        v_i = self.view_indices[i % self.n_view]
        img_rgb, mask, pc, pos, rot = self.load_orig_data(c_i, v_i)

        if self.random:
            img_rgb = preprocess_utils.add_noise(img_rgb)
            rand_h = random.randint(0,40)
            rand_w = random.randint(0,40)
            img_rgb = img_rgb[(120+rand_h):(120+192+rand_h), (140+rand_w):(140+256+rand_w)]
            mask = mask[(120+rand_h):(120+192+rand_h), (140+rand_w):(140+256+rand_w)]
            pc = pc[(120+rand_h):(120+192+rand_h), (140+rand_w):(140+256+rand_w)]
        else:
            img_rgb = img_rgb[140:332,160:416]
            img_depth = pc[140:332,160:416]
            mask = mask[140:332,160:416]
            pc =  pc[140:332,160:416]

        if self.random_flip:
            rand_flip = random.randint(0,1)
        else:
            rand_flip = False

        img_rgb = img_rgb / 255.0  # Scale to [0, 1];
        img_rgb = cv2.resize(img_rgb, img_size)
        img_rgb = img_rgb.transpose(2,0,1).astype(np.float32)

        # imagenet_mean = np.array(
        #     [123.68, 116.779, 103.939], dtype=np.float32)[:, np.newaxis, np.newaxis]
        # img_rgb -= imagenet_mean

        mask = mask.transpose(2,0,1)[0] / 255.0  # Scale to [0, 1];
        mask = cv2.resize(mask, img_size)
        label = mask * c_i

        ## random flip train data
        if rand_flip:
            img_rgb = img_rgb[:,:,::-1]
            label = label[:,::-1]
            pc[:,:,::-1]
            pc[0] *= -1.0
            pos[0] *= -1.0


        # random resizing
        if self.random_resize:
            resize_ratio = random.uniform(0.5, 1.5)
            resized_imsize = (int(img_size[0] * resize_ratio),
                              int(img_size[1] * resize_ratio))

            if resize_ratio < 1.0:
                clop_h = random.randint(0, img_size[1] - resized_imsize[1])
                clop_w = random.randint(0, img_size[0] - resized_imsize[0])

                img_rgb = img_rgb[clop_h:(clop_h + resized_imsize[1]),
                                  clop_w:(clop_w + resized_imsize[0]), :]
                img_rgb = cv2.resize(img_rgb, img_size)

                pc = pc[clop_h:(clop_h + resized_imsize[1]),
                        clop_w:(clop_w + resized_imsize[0]), :]
                pc = cv2.resize(pc, img_size)
                pc[:,:,3] *= 1.0 / resize_ratio

                mask = mask[clop_h:(clop_h + resized_imsize[1]),
                            clop_w:(clop_w + resized_imsize[0])]
                mask = cv2.resize(mask, img_size)

            elif resize_ratio > 1.0:
                clop_h = random.randint(0, resized_imsize[1] - img_size[1])
                clop_w = random.randint(0, resized_imsize[0] - img_size[0])

                img_rgb = cv2.resize(img_rgb, resized_imsize)
                img_rgb = img_rgb[clop_h:(clop_h + img_size[1]),
                                  clop_w:(clop_w + img_size[0]), :]

                pc = cv2.resize(pc, resized_imsize)
                pc = pc[clop_h:(clop_h + img_size[1]),
                        clop_w:(clop_w + img_size[0]), :]
                pc[:,:,3] *= 1.0 / resize_ratio

                mask = cv2.resize(mask, resized_imsize)
                mask = mask[clop_h:(clop_h + img_size[1]),
                            clop_w:(clop_w + img_size[0])]
        # print "-----"
        # print rot
        inv_rot = np.linalg.inv(rot)

        pc = cv2.resize(pc, img_size).transpose(2,0,1)
        img_cp = pos[:, np.newaxis, np.newaxis] - pc
        img_cp[img_cp != img_cp] = 0

        img_ocp = np.dot(inv_rot, - img_cp.reshape(3,-1)).reshape(img_cp.shape)

        img_cp = (img_cp * mask).astype(np.float32)
        img_ocp = (img_ocp * mask).astype(np.float32)

        ## nonnan mask
        nonnan_mask = np.invert(np.isnan(pc[0])).astype(np.float32)

        pos_arr = np.zeros((self.n_class, 3))
        pos_arr[c_i - 1] = pos

        rot_arr = np.zeros((self.n_class, 3, 3))
        rot_arr[c_i - 1] = rot

        # print "============"
        # print rot3
        # print inv_rot
        # print np.ma3x(((img_cp + pc_nonnan) * nonnan_mask).reshape(3,-1), axis=1)
        # print np.min(((img_cp + pc_nonnan) * nonnan_mask).reshape(3,-1), axis=1)
        # print np.max(pc_nonnan.reshape(3,-1), axis=1)
        # print np.min(pc_nonnan.reshape(3,-1), axis=1)
        # mask = mask.reshape(1, mask.shape[0], mask.shape[1])

        return img_rgb, label.astype(np.int32), img_cp, img_ocp, pos_arr, rot_arr, pc, mask.astype(np.int32), nonnan_mask