Esempio n. 1
0
class simulator(object):
    def __init__(self, db, batch_size=None, nn_table=None):
        self.db = db
        self.cfg = db.cfg
        self.batch_size = batch_size if batch_size is not None else self.cfg.batch_size
        if nn_table is None:
            self.nn_table = AllCategoriesTables(db)
            self.nn_table.build_nntables_for_all_categories()
        else:
            self.nn_table = nn_table

    def reset(self):
        self.scenes = []
        frames = []
        if self.cfg.use_color_volume:
            channel_dim = 3 * self.cfg.output_vocab_size
        else:
            channel_dim = 4 + self.cfg.output_vocab_size
        for i in range(self.batch_size):
            scene = {}
            scene['out_inds'] = []
            scene['out_vecs'] = []
            scene['out_patches'] = []
            frame = np.zeros(
                (   self.cfg.input_image_size[1],
                    self.cfg.input_image_size[0],
                    channel_dim
                )
            )
            scene['last_frame'] = frame
            scene['last_label'] = np.zeros(
                (   self.cfg.input_image_size[1],
                    self.cfg.input_image_size[0]
                ), dtype=np.int32
            )
            scene['last_mask'] = np.zeros(
                (   self.cfg.input_image_size[1],
                    self.cfg.input_image_size[0]
                ), dtype=np.float32
            )
            self.scenes.append(scene)
            frames.append(frame)
        frames = np.stack(frames, axis=0)
        return torch.from_numpy(frames)

    def batch_render_to_pytorch(self, out_inds, out_vecs):
        assert(len(out_inds) == self.batch_size)
        outputs = []
        for i in range(self.batch_size):
            frame = self.update_scene(self.scenes[i],
                {'out_inds': out_inds[i], 'out_vec': out_vecs[i]})
            outputs.append(frame)
        outputs = np.stack(outputs, 0)
        return torch.from_numpy(outputs)

    def batch_redraw(self, return_sequence=False):
        out_frames, out_noises, out_masks, out_labels, out_scenes = [], [], [], [], []
        for i in range(len(self.scenes)):
            predicted_scene = self.db.prediction_outputs_to_scene(self.scenes[i], self.nn_table)
            predicted_scene['patches'] = self.scenes[i]['out_patches']
            frames, noises, masks, labels = self.render_predictions_as_output(predicted_scene, return_sequence)
            if not return_sequence:
                frames = frames[None, ...]
                noises = noises[None, ...]
                masks  = masks[None, ...]
                labels = labels[None, ...]
            out_frames.append(frames)
            out_noises.append(noises)
            out_masks.append(masks)
            out_labels.append(labels)
            out_scenes.append(predicted_scene)
        return out_frames, out_noises, out_masks, out_labels, out_scenes

    def render_predictions_as_output(self, scene, return_sequence):
        width  = scene['width']
        height = scene['height']
        clses  = scene['clses']
        boxes  = scene['boxes']
        patches = scene['patches']

        if self.cfg.use_color_volume:
            channel_dim = 3 * self.cfg.output_vocab_size
        else:
            channel_dim = 4 + self.cfg.output_vocab_size

        frame = np.zeros((height, width, channel_dim))
        noise = np.zeros((height, width, channel_dim))
        label = np.zeros((height, width), dtype=np.int32)
        mask = np.zeros((height, width), dtype=np.float32)

        out_frames, out_noises, out_labels, out_masks = [], [], [], []
        for i in range(len(clses)):
            cls_ind = clses[i]
            xywh = boxes[i]
            patch = patches[i]
            xyxy = xywh_to_xyxy(xywh, width, height)
            if self.cfg.use_color_volume:
                frame[:,:,3*cls_ind:3*(cls_ind+1)], mask, _, label, noise[:,:,3*cls_ind:3*(cls_ind+1)] = \
                    patch_compose_and_erose(frame[:,:,3*cls_ind:3*(cls_ind+1)], mask, label, \
                        xyxy, patch, self.db, noise[:,:,3*cls_ind:3*(cls_ind+1)])
            else:
                frame[:,:,-3:], mask, _, label, noise[:,:,-3:] = \
                    patch_compose_and_erose(frame[:,:,-3:], mask, label, xyxy, patch, self.db, noise[:,:,-3:])
                frame[:,:,-4] = np.maximum(mask*255, frame[:,:,-4])
                frame[:,:,cls_ind] = np.maximum(mask*255, frame[:,:,cls_ind])
            out_frames.append(frame.copy())
            out_noises.append(noise.copy())
            out_labels.append(label.copy())
            out_masks.append(mask.copy())

        if len(clses) == 0:
            out_frames.append(frame.copy())
            out_noises.append(noise.copy())
            out_labels.append(label.copy())
            out_masks.append(mask.copy())

        if return_sequence:
            return np.stack(out_frames, 0), np.stack(out_noises, 0), np.stack(out_masks, 0), np.stack(out_labels, 0)
        else:
            return out_frames[-1], out_noises[-1], out_masks[-1], out_labels[-1]

    def update_scene(self, scene, step_prediction):
        ##############################################################
        # Update the scene and the last instance of the scene
        ##############################################################
        out_inds = step_prediction['out_inds'].flatten()
        out_vec  = step_prediction['out_vec'].flatten()
        scene['out_inds'].append(out_inds)
        scene['out_vecs'].append(out_vec)
        scene['last_frame'], scene['last_mask'], scene['last_label'], current_patch = \
            self.update_frame(scene['last_frame'], scene['last_mask'], scene['last_label'], out_inds, out_vec)
        scene['out_patches'].append(current_patch)
        return scene['last_frame']

    def update_frame(self, input_frame, input_mask, input_label, input_inds, input_vec):
        if input_inds[0] <= self.cfg.EOS_idx:
            return input_frame, input_mask, input_label, None
        w = input_frame.shape[-2]
        h = input_frame.shape[-3]
        cls_ind = input_inds[0]
        xywh = self.db.index2box(input_inds[1:])
        xywh = xywh * np.array([w, h, w, h])
        xyxy = xywh_to_xyxy(xywh, w, h)
        patch = self.nn_table.retrieve(cls_ind, input_vec)[0]
        # print(patch)
        # print(patch['name'])

        # update the frame
        if self.cfg.use_color_volume:
            input_frame[:,:,3*cls_ind:3*(cls_ind+1)], input_mask, _, input_label, _ = \
                patch_compose_and_erose(input_frame[:,:,3*cls_ind:3*(cls_ind+1)], input_mask, input_label, xyxy, patch, self.db)
        else:
            input_frame[:,:,-3:], input_mask, _, input_label, _ = \
                patch_compose_and_erose(input_frame[:,:,-3:], input_mask, input_label, xyxy, patch, self.db)
            input_frame[:,:,-4] = np.maximum(255*input_mask, input_frame[:,:,-4])
            input_frame[:,:,cls_ind] = np.maximum(255*input_mask, input_frame[:,:,cls_ind])
        return input_frame, input_mask, input_label, patch
def generate_simulated_scenes(config, split, year):
    db = coco(config, split, year)
    data_dir = osp.join(config.data_dir, 'coco')
    if (split == 'test') or (split == 'aux'):
        images_dir = osp.join(data_dir, 'crn_images', 'train' + year)
        noices_dir = osp.join(data_dir, 'crn_noices', 'train' + year)
        labels_dir = osp.join(data_dir, 'crn_labels', 'train' + year)
        masks_dir = osp.join(data_dir, 'crn_masks', 'train' + year)
    else:
        images_dir = osp.join(data_dir, 'crn_images', split + year)
        noices_dir = osp.join(data_dir, 'crn_noices', split + year)
        labels_dir = osp.join(data_dir, 'crn_labels', split + year)
        masks_dir = osp.join(data_dir, 'crn_masks', split + year)
    maybe_create(images_dir)
    maybe_create(noices_dir)
    maybe_create(labels_dir)
    maybe_create(masks_dir)

    traindb = coco(config, 'train', '2017')
    nn_tables = AllCategoriesTables(traindb)
    nn_tables.build_nntables_for_all_categories(True)

    # start_ind = 0
    # end_ind = len(db.scenedb)
    start_ind = 25000 + 14000 * config.seed
    end_ind = 25000 + 14000 * (config.seed + 1)
    patches_per_class = traindb.patches_per_class
    color_transfer_threshold = 0.8

    for i in range(start_ind, end_ind):
        entry = db.scenedb[i]
        width = entry['width']
        height = entry['height']
        xywhs = entry['boxes']
        masks = entry['masks']
        clses = entry['clses']
        image_index = entry['image_index']
        instance_inds = entry['instance_inds']

        full_mask = np.zeros((height, width), dtype=np.float32)
        full_label = np.zeros((height, width), dtype=np.float32)
        full_image = np.zeros((height, width, 3), dtype=np.float32)
        full_noice = np.zeros((height, width, 3), dtype=np.float32)

        original_image = cv2.imread(db.color_path_from_index(image_index),
                                    cv2.IMREAD_COLOR)

        for j in range(len(masks)):
            src_img = original_image.astype(np.float32).copy()
            xywh = xywhs[j]
            mask = masks[j]
            cls_idx = clses[j]
            instance_ind = instance_inds[j]
            embed_path = db.patch_path_from_indices(
                image_index, instance_ind, 'patch_feature', 'pkl',
                config.use_patch_background)
            with open(embed_path, 'rb') as fid:
                query_vector = pickle.load(fid)
            n_samples = min(
                100, len(patches_per_class[cls_idx])
            )  #min(config.n_nntable_trees, len(patches_per_class[cls_idx]))
            candidate_patches = nn_tables.retrieve(cls_idx, query_vector,
                                                   n_samples)
            candidate_patches = [
                x for x in candidate_patches
                if x['instance_ind'] != instance_ind
            ]
            assert (len(candidate_patches) > 1)

            # candidate_instance_ind = instance_ind
            # candidate_patch = None
            # while (candidate_instance_ind == instance_ind):
            # 	cid = np.random.randint(0, len(candidate_patches))
            # 	candidate_patch = candidate_patches[cid]
            # 	candidate_instance_ind = candidate_patch['instance_ind']
            candidate_patch = find_closest_patch(db, traindb, image_index,
                                                 instance_ind,
                                                 candidate_patches)

            # stenciling
            src_mask = COCOmask.decode(mask)
            dst_mask = COCOmask.decode(candidate_patch['mask'])
            src_xyxy = xywh_to_xyxy(xywh, width, height)
            dst_xyxy = xywh_to_xyxy(candidate_patch['box'],
                                    candidate_patch['width'],
                                    candidate_patch['height'])
            dst_mask = dst_mask[dst_xyxy[1]:(dst_xyxy[3] + 1),
                                dst_xyxy[0]:(dst_xyxy[2] + 1)]
            dst_mask = cv2.resize(
                dst_mask,
                (src_xyxy[2] - src_xyxy[0] + 1, src_xyxy[3] - src_xyxy[1] + 1),
                interpolation=cv2.INTER_NEAREST)
            src_mask[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1)] = \
             np.minimum(dst_mask, src_mask[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1)])
            # color transfer
            if random.random() > color_transfer_threshold:
                candidate_index = candidate_patch['image_index']
                candidate_image = cv2.imread(
                    traindb.color_path_from_index(candidate_index),
                    cv2.IMREAD_COLOR).astype(np.float32)
                candidate_cropped = candidate_image[dst_xyxy[1]:(dst_xyxy[3] +
                                                                 1),
                                                    dst_xyxy[0]:(dst_xyxy[2] +
                                                                 1)]
                candidate_cropped = cv2.resize(candidate_cropped,
                                               (src_xyxy[2] - src_xyxy[0] + 1,
                                                src_xyxy[3] - src_xyxy[1] + 1),
                                               interpolation=cv2.INTER_CUBIC)
                original_cropped = src_img[src_xyxy[1]:(src_xyxy[3] + 1),
                                           src_xyxy[0]:(src_xyxy[2] + 1)]
                transfer_cropped = Monge_Kantorovitch_color_transfer(
                    original_cropped, candidate_cropped)
                src_img[src_xyxy[1]:(src_xyxy[3] + 1),
                        src_xyxy[0]:(src_xyxy[2] + 1)] = transfer_cropped

            # im1 = cv2.resize(full_image, (128, 128))
            # im2 = cv2.resize(src_img[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1), :], (128, 128))
            # # im2 = cv2.resize(np.repeat(255*src_mask[...,None], 3, -1), (128, 128))
            # im3 = cv2.resize(candidate_image, (128, 128))
            # im4 = cv2.resize(candidate_cropped, (128, 128))
            # im = np.concatenate((im1, im2, im3, im4), 1)
            # cv2.imwrite("%03d_%03d.png"%(i, j), im)

            full_image = compose(full_image, src_img, src_mask)

            # boundary elision
            radius = int(0.05 * min(width, height))
            if np.amin(src_mask) > 0:
                src_mask[0, :] = 0
                src_mask[-1, :] = 0
                src_mask[:, 0] = 0
                src_mask[:, -1] = 0
            sobelx = cv2.Sobel(src_mask, cv2.CV_64F, 1, 0, ksize=3)
            sobely = cv2.Sobel(src_mask, cv2.CV_64F, 0, 1, ksize=3)
            sobel = np.abs(sobelx) + np.abs(sobely)
            edge = np.zeros_like(sobel)
            edge[sobel > 0.9] = 1.0
            morp_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,
                                                    (radius, radius))
            edge = cv2.dilate(edge, morp_kernel, iterations=1)
            row, col = np.where(edge > 0)
            n_edge_pixels = len(row)
            pixel_indices = np.random.permutation(range(n_edge_pixels))
            pixel_indices = pixel_indices[:(n_edge_pixels // 2)]
            row = row[pixel_indices]
            col = col[pixel_indices]
            src_img[row, col, :] = 255

            full_mask = np.maximum(full_mask, src_mask)
            full_label[src_mask > 0] = cls_idx
            full_noice = compose(full_noice, src_img, src_mask)

            # im1 = cv2.resize(full_image, (128, 128))
            # im2 = cv2.resize(src_img[src_xyxy[1]:(src_xyxy[3]+1), src_xyxy[0]:(src_xyxy[2]+1), :], (128, 128))
            # im3 = cv2.resize(candidate_image, (128, 128))
            # im4 = cv2.resize(candidate_cropped, (128, 128))
            # im = np.concatenate((im1, im2, im3, im4), 1)
            # cv2.imwrite("%03d_%03d.png"%(i, j), im)

        output_name = str(image_index).zfill(12)
        output_path = osp.join(images_dir, output_name + '.jpg')
        cv2.imwrite(output_path,
                    clamp_array(full_image, 0, 255).astype(np.uint8))
        output_path = osp.join(noices_dir, output_name + '.jpg')
        cv2.imwrite(output_path,
                    clamp_array(full_noice, 0, 255).astype(np.uint8))
        output_path = osp.join(masks_dir, output_name + '.png')
        cv2.imwrite(output_path,
                    clamp_array(255 * full_mask, 0, 255).astype(np.uint8))
        output_path = osp.join(labels_dir, output_name + '.png')
        cv2.imwrite(output_path, full_label.astype(np.uint8))
        print(i, image_index)